In [1]:
RESULTS_PATH = '../results/'

In [2]:
import numpy as np
import inputs
import classification
import itertools
from sklearn.preprocessing import StandardScaler, MinMaxScaler

Select the parameters and models for training, validation and testing

In [3]:
# task_list = [inputs.binary, inputs.six_transient, inputs.seven_transient, inputs.seven_class, inputs.eight_class]
# min_obs_list = [5,10]
# num_features_list = [31, 27, 21]
# oversample_list = [True, False]
# model_list = [classification.svc, classification.rf, classification.mlp]
# scaler_list = [StandardScaler, MinMaxScaler]

In [97]:
task_list = [inputs.eight_class]
min_obs_list = [10]
num_features_list = [27]
oversample_list = [False]
model_list = [classification.rf]
scaler_list = [StandardScaler]

For every combination of parameters selected, perform pre-processing and classification

In [98]:
for combination in itertools.product(task_list, min_obs_list, num_features_list, oversample_list, model_list, scaler_list):
    task, min_obs, num_features, oversample, model, scaler = combination
    print('STARTING TASK: ', task.__name__, min_obs, num_features, oversample, model.__name__, scaler.__name__)
    # Obtain inputs
    X_train, X_test, y_train, y_test = task(min_obs, num_features, oversample=oversample)
    # Scale inputs
    scaler = scaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    # Perform Classification
    clf = model(X_train, y_train, X_test, y_test, min_obs, num_features, oversample, task, scaler)
    y_pred = clf.predict(X_test)
    ID_test = task(min_obs, num_features, oversample=oversample, remove_ids=False)[1][:,0]
    incorrect = np.where(y_pred != y_test)
    correct = np.where(y_pred == y_test)
    print('Finished Task\n')

STARTING TASK:  eight_class 10 27 False rf StandardScaler
Train Shapes (X, y):  (3200, 27), (3200,)
Test Shapes (X, y):  (1577, 27), (1577,)
Unique classes: ['AGN' 'Blazar' 'CV' 'Flare' 'HPM' 'Non-Transient' 'Other' 'SN']
Unique count: [285 156 524 125 268 703 436 703]
Best Params: {'max_features': 'auto', 'n_estimators': 700}
Validation Accuracy: 0.6609
Test Accuracy: 0.6823
Report:
                precision    recall  f1-score   support

          AGN     0.7153    0.7305    0.7228       141
       Blazar     0.6667    0.3421    0.4522        76
           CV     0.7636    0.7636    0.7636       258
        Flare     0.6571    0.3710    0.4742        62
          HPM     0.9291    0.8939    0.9112       132
Non-Transient     0.6725    0.8934    0.7673       347
        Other     0.4800    0.3364    0.3956       214
           SN     0.6253    0.6542    0.6394       347

  avg / total     0.6753    0.6823    0.6695      1577
Confusion Matrix:
 [[103   4   1   0   0   9  20   4]
 [  9 

In [99]:
num_objects = 4

Obtain Correctly classified objects

In [100]:
np.random.seed(42)

dict_correct = { 'task': task.__name__ }

for target in np.unique(y_pred):
    correct_target_indexes = np.where(y_pred[correct] == target)[0]
    num_target_objects = correct_target_indexes.shape[0]
    rand_indexes = np.random.choice(num_target_objects, num_objects, replace=False)
    dict_correct[target] = ID_test[correct][correct_target_indexes][rand_indexes].tolist()
    print(target)
    print(dict_correct[target])
with open('correct.txt','w') as f:
    f.write(str(dict_correct))

AGN
['TranID1511190090264135161', 'TranID1602271320384111174', 'TranID1104010150534111525', 'TranID1105061460454102381']
Blazar
['TranID1511081150254112077', 'TranID1109180041254129754', 'TranID1006040091104115912', 'TranID1009151090134116882']
CV
['TranID1004121380694101469', 'TranID809221210074124820', 'TranID1004080180844150528', 'TranID1201191570254149183']
Flare
['TranID812020070254115621', 'TranID1105261210664103986', 'TranID1607041211134127847', 'TranID1301141010054127807']
HPM
['TranID1511081570324139313', 'TranID1506191071194103098', 'TranID1607061380624103568', 'TranID1504011460534132127']
Non-Transient
['CataID2103025023457', 'CataID2008186028039', 'CataID1015107048794', 'CataID1121020060511']
Other
['TranID1306201260664112997', 'TranID1004100040684115432', 'TranID1212051210064142482', 'TranID1103091230694138108']
SN
['TranID1212071120514101732', 'TranID1102231350604110103', 'TranID901021180674115321', 'TranID1610051290094112842']


Obtain incorrectly classified objects

In [101]:
np.random.seed(42)

dict_incorrect = { 'task': task.__name__ }

for target in np.unique(y_pred):
    incorrect_target_indexes = (np.where(y_test[incorrect] == target))[0]
    num_target_objects = incorrect_target_indexes.shape[0]
    rand_indexes = np.random.choice(num_target_objects, num_objects, replace=False)
    dict_incorrect[target] = ID_test[incorrect][incorrect_target_indexes][rand_indexes].tolist()
    print(target)
    print(dict_incorrect[target])
with open('incorrect.txt','w') as f:
    f.write(str(dict_incorrect))

AGN
['TranID1202011430404119121', 'TranID1503131380534110321', 'TranID1212201070564109636', 'TranID1304151460494144378']
Blazar
['TranID1309161261134112078', 'TranID1004060010564103401', 'TranID1001150010144126826', 'TranID905171230654131642']
CV
['TranID1411180090094108372', 'TranID904031350204105547', 'TranID1605310070594119120', 'TranID909111210064128226']
Flare
['TranID1512161181174138560', 'TranID1012061010234104635', 'TranID1610211260104103466', 'TranID1505241120834142545']
HPM
['TranID1111031520324144272', 'TranID1504211350674136634', 'TranID1610241600204126524', 'TranID1204011230584135800']
Non-Transient
['CataID1138012047066', 'CataID1115057015149', 'CataID2116014019996', 'CataID1007113019532']
Other
['TranID1111230010184112537', 'TranID1303171290584119068', 'TranID1510121400034160762', 'TranID1503261490484103204']
SN
['TranID804011150764118274', 'TranID1510161150164123038', 'TranID1104131150874170907', 'TranID1010101351004150592']


In [79]:
# ID_test[correct][:15], y_test[correct][:15], y_pred[correct][:15]

In [34]:
# ID_test[incorrect][:15], y_test[incorrect][:15], y_pred[incorrect][:15]