In [1]:
import numpy as np
import pandas as pd

import glob
import math
import time
import pickle
import joblib
import warnings
from sktime.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix
import Measurements as measurements
from sklearn.metrics import make_scorer
from sklearn.model_selection import GroupKFold

from sktime.classification.interval_based import CanonicalIntervalForest

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

import Feature_rank_lib as lib

In [2]:
# This method extract the necessary features from the destination folder
def data_reader(loadPath, partitions, flare_label):
    cols = ['USFLUX','TOTUSJZ','TOTUSJH','ABSNJZH','SAVNCPP','TOTPOT','TOTBSQ','TOTFZ','MEANPOT','EPSZ','MEANSHR','SHRGT45','MEANGAM','MEANGBT','MEANGBZ','MEANGBH','MEANJZH','TOTFY','MEANJZD','MEANALP','TOTFX','EPSY','EPSX','R_VALUE']
    # Read files from the define path
    all_files = glob.glob(str(loadPath) + partitions + "/" + flare_label + "/*.csv")

    li = []
    for filename in all_files:
        # Read the file and extract necessary features
        df = pd.read_csv(filename, index_col=None, header=0, sep='\t')
        df.interpolate(method='linear', axis=0, limit_direction='both', inplace = True)
#         # Extract label info
#         INFO = filename.split('/')[-1]
        # Define Label value based on the file name
    
        if flare_label == 'NF':
            LABEL = 'CBN'
        else:
            LABEL = 'XM'
            
        col_list = []
        for col in cols:
            if not df[col].isnull().values.all(axis=0):
                col_list.append(df[col])
        if len(col_list) == 24:
            li.append(col_list + [LABEL])

    # Create and return the dataframe build on the extracted features
    partition_frame = pd.DataFrame(li, columns= cols + ['LABEL'])
    return partition_frame

def evaluation(x_test, y_test, y_pred, clf):
    scores = confusion_matrix(y_test, y_pred, labels=['CBN', 'XM']).ravel()
    tn, fp, fn, tp = scores

    results_DF = pd.DataFrame(columns = ['Confusion_Matrix(tn, fp, fn, tp)', 'Accur', 'TSS', 'HSS', 'GSS', 'TPR', 'TNR', 'CBNPr', 'XMPr', 'FAR', 'POFD', 'f1XM', 'f1CBN'], index = [0])

    #Confusion Matrix
    results_DF['Confusion_Matrix(tn, fp, fn, tp)'] = [scores]
    
    #Accuracy
    scoreTest = clf.score(x_test, y_test)
    #print("Train Accuracy: " + str(round(scoreTrain, 4)))
    #print("Test Accuracy: " + str(round(scoreTest, 4)))
    results_DF['Accur'] = scoreTest

    # TSS
    tss = measurements.TSS(scores)
    results_DF['TSS'] = tss

    # HSS2 Definition 2
    hss2 = measurements.HSS2(scores)
    results_DF['HSS'] = hss2

    # GSS
    gss = measurements.GSS(scores)
    results_DF['GSS'] = gss

    # TPR
    tpr = measurements.TPR(scores)
    results_DF['TPR'] = tpr
    
    # TNR
    tnr = measurements.TNR(scores)
    results_DF['TNR'] = tnr

    # Precision Negative
    negPrecision = measurements.precisionNeg(scores)
    results_DF['CBNPr'] = negPrecision
    
    # Precision Positive
    posPrecision = measurements.precisionPos(scores)
    results_DF['XMPr'] = posPrecision

    # FAR
    far = measurements.FAR(scores)
    results_DF['FAR'] = far

    # POFD
    pofd = measurements.POFD(scores)
    results_DF['POFD'] = pofd

    # F1(XM)
    f1XM = measurements.F1Pos(scores)
    results_DF['f1XM'] = f1XM

    # F1(CBN)
    f1CBN = measurements.F1Neg(scores)
    results_DF['f1CBN'] = f1CBN

    # Return the result measurement dataframe
    return results_DF

In [3]:
datapath = '/data/SHARPS/BERKAY/v0.7/new-data-folds/instances_O12L0P24/'

In [4]:
partition1_FL = data_reader(datapath,'partition1','FL')
partition1_NF = data_reader(datapath,'partition1','NF')

partition2_FL = data_reader(datapath,'partition2','FL')
partition2_NF = data_reader(datapath,'partition2','NF')

partition3_FL = data_reader(datapath,'partition3','FL')
partition3_NF = data_reader(datapath,'partition3','NF')

partition4_FL = data_reader(datapath,'partition4','FL')
partition4_NF = data_reader(datapath,'partition4','NF')

partition5_FL = data_reader(datapath,'partition5','FL')
partition5_NF = data_reader(datapath,'partition5','NF')

In [5]:
partition1 = pd.concat([partition1_FL, partition1_NF], ignore_index=True)
partition2 = pd.concat([partition2_FL, partition2_NF], ignore_index=True)
partition3 = pd.concat([partition3_FL, partition3_NF], ignore_index=True)
partition4 = pd.concat([partition4_FL, partition4_NF], ignore_index=True)
partition5 = pd.concat([partition5_FL, partition5_NF], ignore_index=True)

In [6]:
X_train_TEp5 = pd.concat([partition1, partition2, partition3, partition4], ignore_index=True)
X_train_TEp4 = pd.concat([partition1, partition2, partition3, partition5], ignore_index=True)
X_train_TEp3 = pd.concat([partition1, partition2, partition4, partition5], ignore_index=True)
X_train_TEp2 = pd.concat([partition1, partition3, partition4, partition5], ignore_index=True)
X_train_TEp1 = pd.concat([partition2, partition3, partition4, partition5], ignore_index=True)

y_train_TEp5 = X_train_TEp5['LABEL']
X_train_TEp5 = X_train_TEp5.loc[:, X_train_TEp5.columns != 'LABEL']
y_train_TEp4 = X_train_TEp4['LABEL']
X_train_TEp4 = X_train_TEp4.loc[:, X_train_TEp4.columns != 'LABEL']
y_train_TEp3 = X_train_TEp3['LABEL']
X_train_TEp3 = X_train_TEp3.loc[:, X_train_TEp3.columns != 'LABEL']
y_train_TEp2 = X_train_TEp2['LABEL']
X_train_TEp2 = X_train_TEp2.loc[:, X_train_TEp2.columns != 'LABEL']
y_train_TEp1 = X_train_TEp1['LABEL']
X_train_TEp1 = X_train_TEp1.loc[:, X_train_TEp1.columns != 'LABEL']

In [7]:
X_test_TEp5 = partition5.loc[:, partition5.columns != 'LABEL']
y_test_TEp5 = partition5['LABEL']
X_test_TEp4 = partition4.loc[:, partition4.columns != 'LABEL']
y_test_TEp4 = partition4['LABEL']
X_test_TEp3 = partition3.loc[:, partition3.columns != 'LABEL']
y_test_TEp3 = partition3['LABEL']
X_test_TEp2 = partition2.loc[:, partition2.columns != 'LABEL']
y_test_TEp2 = partition2['LABEL']
X_test_TEp1 = partition1.loc[:, partition1.columns != 'LABEL']
y_test_TEp1 = partition1['LABEL']

In [8]:
import CIF as cif

In [9]:
def train_cif(X_train,y_train,X_test,y_test,test_label='p5'):
    result_DF = pd.DataFrame(columns = ['Confusion_Matrix(tn, fp, fn, tp)', 'Accur', 'TSS', 'HSS', 'GSS', 'TPR', 'TNR', 'CBNPr', 'XMPr', 'FAR', 'POFD', 'f1XM', 'f1CBN', 'Experiments'])

    cws=[5,6,7,8,9,10]
    for cw in cws:
        clf = cif.CanonicalIntervalForest(n_estimators=100, n_intervals=8, att_subsample_size=2, base_estimator='DTC', cw=cw, n_jobs=15) 

        print('Testing '+test_label+', class_weights: {CBN:1, XM:'+str(cw)+'}')
        t0=time.time()
        clf.fit(X_train, y_train)
#         joblib.dump(multivar_rocket_, './models/Rocket/TE'+test_label+'_cw'+str(cw)+'.sav')
        print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

        t1=time.time()
        y_pred = clf.predict(X_test)
        print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

        t2=time.time()
        result = evaluation(X_test, y_test, y_pred, clf)
        result['Experiments'] = ['TE'+test_label+'_cw'+str(cw)]
        result_DF = pd.concat([result_DF,result])
        print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

    #     results = ridge.cv_results_
    #     allFoldResults_DF = pd.DataFrame.from_dict(results)

    result_DF = result_DF.reset_index(drop=True)
    return result_DF

In [10]:
result_TEp5 = train_cif(X_train_TEp5,y_train_TEp5,X_test_TEp5,y_test_TEp5)
result_TEp5.to_csv('./results/CIF/TEp5_cw.csv')

Testing p5, class_weights: {CBN:1, XM:5}
	Training time: 2236.94 s


  transformed_x = transformed_x.round(8)


	Predicting time: 637.41 s


  transformed_x.round(8)


	Evaluation time: 634.44 s
Testing p5, class_weights: {CBN:1, XM:6}


  transformed_x.round(8)


	Training time: 2247.39 s
	Predicting time: 652.96 s
	Evaluation time: 654.14 s
Testing p5, class_weights: {CBN:1, XM:7}
	Training time: 2214.95 s
	Predicting time: 632.35 s
	Evaluation time: 638.66 s
Testing p5, class_weights: {CBN:1, XM:8}
	Training time: 2199.58 s
	Predicting time: 639.82 s
	Evaluation time: 642.97 s
Testing p5, class_weights: {CBN:1, XM:9}
	Training time: 2212.14 s
	Predicting time: 635.37 s


  precisionPos = TP / float(TP + FP)
  FAR = FP / float(TP + FP)
  precision = TP / float(TP + FP)


	Evaluation time: 641.36 s
Testing p5, class_weights: {CBN:1, XM:10}
	Training time: 2158.17 s
	Evaluation time: 524.54 s


In [30]:
result_TEp4[['TSS','HSS','CBNPr','XMPr','FAR','TPR']].mean(axis=0)

TSS      0.017243
HSS      0.031407
CBNPr    0.977638
XMPr     0.256531
FAR      0.743469
TPR      0.018169
dtype: float64

In [14]:
result_TEp4 = train_cif(X_train_TEp4,y_train_TEp4,X_test_TEp4,y_test_TEp4,test_label='p4')
result_TEp4.to_csv('./results/CIF/TEp4_cw.csv')

Testing p4, class_weights: {CBN:1, XM:5}
	Training time: 1958.9 s
	Predicting time: 363.08 s
	Evaluation time: 365.61 s
Testing p4, class_weights: {CBN:1, XM:6}
	Training time: 1996.63 s
	Predicting time: 350.21 s
	Evaluation time: 354.68 s
Testing p4, class_weights: {CBN:1, XM:7}
	Training time: 1966.33 s
	Predicting time: 350.86 s
	Evaluation time: 355.71 s
Testing p4, class_weights: {CBN:1, XM:8}
	Training time: 1962.84 s
	Predicting time: 357.68 s


  f1 = 2 * ((precision * recall) / (precision + recall))


	Evaluation time: 356.17 s
Testing p4, class_weights: {CBN:1, XM:9}
	Training time: 1941.44 s
	Predicting time: 353.84 s
	Evaluation time: 352.84 s
Testing p4, class_weights: {CBN:1, XM:10}
	Training time: 2047.4 s
	Predicting time: 362.84 s
	Evaluation time: 364.06 s


In [16]:
result_TEp3 = train_cif(X_train_TEp3,y_train_TEp3,X_test_TEp3,y_test_TEp3,test_label='p3')
result_TEp3.to_csv('./results/CIF/TEp3_cw.csv')

Testing p3, class_weights: {CBN:1, XM:5}
	Training time: 2022.09 s
	Predicting time: 302.59 s


  precisionPos = TP / float(TP + FP)
  FAR = FP / float(TP + FP)
  precision = TP / float(TP + FP)


	Evaluation time: 284.39 s
Testing p3, class_weights: {CBN:1, XM:6}
	Training time: 2043.95 s
	Predicting time: 311.96 s


  precisionPos = TP / float(TP + FP)
  FAR = FP / float(TP + FP)
  precision = TP / float(TP + FP)


	Evaluation time: 296.44 s
Testing p3, class_weights: {CBN:1, XM:7}
	Training time: 2082.5 s
	Predicting time: 309.33 s


  f1 = 2 * ((precision * recall) / (precision + recall))


	Evaluation time: 293.23 s
Testing p3, class_weights: {CBN:1, XM:8}
	Training time: 2011.88 s


  transformed_x = transformed_x.round(8)


	Predicting time: 303.65 s
	Evaluation time: 282.54 s
Testing p3, class_weights: {CBN:1, XM:9}


  transformed_x.round(8)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  transformed_x.round(8)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


	Training time: 2049.15 s
	Predicting time: 308.57 s


  f1 = 2 * ((precision * recall) / (precision + recall))


	Evaluation time: 290.68 s
Testing p3, class_weights: {CBN:1, XM:10}
	Training time: 2053.22 s
	Predicting time: 309.01 s
	Evaluation time: 290.39 s


In [17]:
result_TEp2 = train_cif(X_train_TEp2,y_train_TEp2,X_test_TEp2,y_test_TEp2,test_label='p2')
result_TEp2.to_csv('./results/CIF/TEp2_cw.csv')

Testing p2, class_weights: {CBN:1, XM:5}
	Training time: 1735.62 s
	Predicting time: 618.54 s
	Evaluation time: 707.52 s
Testing p2, class_weights: {CBN:1, XM:6}
	Training time: 1744.36 s
	Predicting time: 628.49 s
	Evaluation time: 634.93 s
Testing p2, class_weights: {CBN:1, XM:7}
	Training time: 1769.18 s
	Predicting time: 625.67 s
	Evaluation time: 648.64 s
Testing p2, class_weights: {CBN:1, XM:8}
	Training time: 1735.51 s
	Predicting time: 628.16 s
	Evaluation time: 627.1 s
Testing p2, class_weights: {CBN:1, XM:9}
	Training time: 1735.82 s
	Predicting time: 623.07 s
	Evaluation time: 625.25 s
Testing p2, class_weights: {CBN:1, XM:10}
	Training time: 1722.49 s
	Predicting time: 624.83 s
	Evaluation time: 624.74 s


In [25]:
result_TEp1 = train_cif(X_train_TEp1,y_train_TEp1,X_test_TEp1,y_test_TEp1,test_label='p1')
result_TEp1.to_csv('./results/CIF/TEp1_cw.csv')

Testing p1, class_weights: {CBN:1, XM:5}
	Training time: 1838.95 s
	Predicting time: 640.97 s


  f1 = 2 * ((precision * recall) / (precision + recall))


	Evaluation time: 525.83 s
Testing p1, class_weights: {CBN:1, XM:6}
	Training time: 1807.43 s
	Predicting time: 516.07 s
	Evaluation time: 523.95 s
Testing p1, class_weights: {CBN:1, XM:7}
	Training time: 1834.05 s
	Predicting time: 525.07 s
	Training time: 1798.74 s
	Predicting time: 515.17 s
	Evaluation time: 515.82 s
Testing p1, class_weights: {CBN:1, XM:9}
	Training time: 1792.7 s
	Predicting time: 516.01 s
	Evaluation time: 521.91 s
Testing p1, class_weights: {CBN:1, XM:10}
	Training time: 1809.35 s


  transformed_x = transformed_x.round(8)


	Predicting time: 520.07 s
	Evaluation time: 529.91 s


  precisionPos = TP / float(TP + FP)
  FAR = FP / float(TP + FP)
  precision = TP / float(TP + FP)
