In [None]:
import numpy as np
import pandas
import scipy
from sklearn.ensemble import RandomForestClassifier, ExtraTreesRegressor
import os
from sklearn.metrics import matthews_corrcoef, recall_score, roc_auc_score, balanced_accuracy_score, roc_curve, auc, confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import KNNImputer, IterativeImputer
from sklearn import svm
import xgboost
import seaborn as sns
from sklearn.pipeline import Pipeline
from pytorch_tabnet.tab_model import  TabNetClassifier
import torch


In [None]:
df = pandas.read_csv('data/data2.csv')

In [None]:
X = df.loc[:, 'PS1imag':'W4mag']
y = df.loc[:, 'label']
cols = df.loc[:, 'PS1imag':'W4mag']

Two options of learning: with optical magnitudes PS1imag, PS1zmag, PS1ymag and without them.

In [None]:
first_mag = 'PS1imag'
#first_mag = 'Jmag'

In [None]:
dx = pandas.DataFrame(data=X, columns=cols.columns)
dx['i_z'] = dx['PS1imag']-dx['PS1zmag']
dx['i_y'] = dx['PS1imag']-dx['PS1ymag']
dx['z_y'] = dx['PS1zmag']-dx['PS1ymag']
dx['z_J'] = dx['PS1zmag']-dx['Jmag']
dx['y_J'] = dx['PS1ymag']-dx['Jmag']
dx['J_H'] = dx['Jmag']-dx['Hmag']
dx['H_Ks'] = dx['Hmag']-dx['Ksmag']
dx['W1_W2'] = dx['W1mag']-dx['W2mag']
X_ = np.array(dx.loc[:, first_mag:'W1_W2'])
cols = dx.loc[:, first_mag:'W1_W2']

In [None]:
imputer = IterativeImputer(estimator=ExtraTreesRegressor(n_estimators=30, max_features=14, max_depth=20, min_samples_split=15), max_iter=50, random_state=123)
X = imputer.fit_transform(X_)

In [None]:
X_train, x_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=123)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=123)

Test of learning rules on L&T data sample with Carnero Rosell et al.(2019) and Burningham et al.(2013)

In [None]:
pos = pandas.DataFrame(data=x_test, columns=cols.columns)
mask =  (pos['z_y']>0.15) & (pos['i_z']>1.2) & (pos['y_J']>1.6) # Carnero Rosell et al.(2019)
mask =  (pos['z_J']>2.5) & (pos['Jmag']<18.8) # Burningham et al.(2013)
corr_dr = matthews_corrcoef(mask, y_test)
print(corr_dr)

Scaling

In [None]:
scaler = StandardScaler()
X_train2 = scaler.fit_transform(X_train)
x_val2 = scaler.transform(x_val)
x_test2 = scaler.transform(x_test)

Random Forest, hyperparameters from optuna:

In [None]:
rf_model = RandomForestClassifier(n_estimators=500, max_depth=15, min_samples_leaf=10, max_features= 2)
rf_model.fit(X_train2, y_train)
y_pred_rf = rf_model.predict(x_test2)
matthews_corrcoef(y_test, y_pred_rf)

SVM, hyperparameters from optuna:

In [None]:
svc_model = svm.SVC(kernel='rbf', C=1.1785578339058878, class_weight='balanced', gamma='scale', coef0=4.37386719156764, decision_function_shape='ovr', random_state=123, probability=True)
svc_model.fit(X_train2, y_train)
y_pred_svc=svc_model.predict(x_test2)
matthews_corrcoef(y_test, y_pred_svc)

XGBoost, hyperparameters from optuna:

In [None]:
xgb_clf = xgboost.XGBClassifier(max_depth=24, n_estimators=500, booster='gbtree', learning_rate=0.7643065952721012, gamma=0.60562070482283363, subsample=0.53759136820407905,
                            n_jobs=2, random_state=1, verbosity=0)
xgb_clf.fit(X_train2, y_train)
y_pred_xgb = xgb_clf.predict(x_test2)
matthews_corrcoef(y_pred_xgb, y_test)

Tabnet:


In [None]:
tbn_clf = TabNetClassifier(n_a=32, n_d=32, n_shared=3, n_steps=3, gamma=1, optimizer_fn=torch.optim.Adam,
                       optimizer_params=dict(lr=2e-2),
                       scheduler_params={"step_size":3, 
                                         "gamma":0.9},
                       scheduler_fn=torch.optim.lr_scheduler.StepLR,
                       mask_type='entmax')
tbn_clf.fit(X_train2, y_train,            
    eval_set=[(X_train2, y_train), (x_val2, y_val)],
    eval_name=['train', 'valid'],
    eval_metric=['balanced_accuracy'],
    max_epochs=92 , patience=50,
    batch_size=256, virtual_batch_size=128,
    num_workers=0,
    weights=1,
    drop_last=False)

In [None]:
y_pred_tbn = tbn_clf.predict(x_test2)
corr_tbn = matthews_corrcoef(y_pred_tbn, y_test)
print(corr_tbn)