In [1]:
import numpy as np
import pandas as pd

import glob
import math
import time
import pickle
import joblib
import warnings
from sktime.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix
import Measurements as measurements
from sklearn.metrics import make_scorer
from sklearn.model_selection import GroupKFold

from aeon.classification.dictionary_based import BOSSEnsemble

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

import Feature_rank_lib as lib

In [2]:
# This method extract the necessary features from the destination folder
def data_reader(loadPath, partitions, flare_label):
    cols = ['USFLUX','TOTUSJZ','TOTUSJH','ABSNJZH','SAVNCPP','TOTPOT','TOTBSQ','TOTFZ','MEANPOT','EPSZ','MEANSHR','SHRGT45','MEANGAM','MEANGBT','MEANGBZ','MEANGBH','MEANJZH','TOTFY','MEANJZD','MEANALP','TOTFX','EPSY','EPSX','R_VALUE']
    # Read files from the define path
    all_files = glob.glob(str(loadPath) + partitions + "/" + flare_label + "/*.csv")

    li = []
    for filename in all_files:
        # Read the file and extract necessary features
        df = pd.read_csv(filename, index_col=None, header=0, sep='\t')
        df.interpolate(method='linear', axis=0, limit_direction='both', inplace = True)
#         # Extract label info
#         INFO = filename.split('/')[-1]
        # Define Label value based on the file name
    
        if flare_label == 'NF':
            LABEL = 'CBN'
        else:
            LABEL = 'XM'
            
        col_list = []
        for col in cols:
            if not df[col].isnull().values.all(axis=0):
                col_list.append(df[col])
        if len(col_list) == 24:
            li.append(col_list + [LABEL])

    # Create and return the dataframe build on the extracted features
    partition_frame = pd.DataFrame(li, columns= cols + ['LABEL'])
    return partition_frame

def evaluation(x_test, y_test, y_pred, clf):
    scores = confusion_matrix(y_test, y_pred, labels=['CBN', 'XM']).ravel()
    tn, fp, fn, tp = scores

    results_DF = pd.DataFrame(columns = ['Confusion_Matrix(tn, fp, fn, tp)', 'Accur', 'TSS', 'HSS', 'GSS', 'TPR', 'TNR', 'CBNPr', 'XMPr', 'FAR', 'POFD', 'f1XM', 'f1CBN'], index = [0])

    #Confusion Matrix
    results_DF['Confusion_Matrix(tn, fp, fn, tp)'] = [scores]
    
    #Accuracy
    scoreTest = clf.score(x_test, y_test)
    #print("Train Accuracy: " + str(round(scoreTrain, 4)))
    #print("Test Accuracy: " + str(round(scoreTest, 4)))
    results_DF['Accur'] = scoreTest

    # TSS
    tss = measurements.TSS(scores)
    results_DF['TSS'] = tss

    # HSS2 Definition 2
    hss2 = measurements.HSS2(scores)
    results_DF['HSS'] = hss2

    # GSS
    gss = measurements.GSS(scores)
    results_DF['GSS'] = gss

    # TPR
    tpr = measurements.TPR(scores)
    results_DF['TPR'] = tpr
    
    # TNR
    tnr = measurements.TNR(scores)
    results_DF['TNR'] = tnr

    # Precision Negative
    negPrecision = measurements.precisionNeg(scores)
    results_DF['CBNPr'] = negPrecision
    
    # Precision Positive
    posPrecision = measurements.precisionPos(scores)
    results_DF['XMPr'] = posPrecision

    # FAR
    far = measurements.FAR(scores)
    results_DF['FAR'] = far

    # POFD
    pofd = measurements.POFD(scores)
    results_DF['POFD'] = pofd

    # F1(XM)
    f1XM = measurements.F1Pos(scores)
    results_DF['f1XM'] = f1XM

    # F1(CBN)
    f1CBN = measurements.F1Neg(scores)
    results_DF['f1CBN'] = f1CBN

    # Return the result measurement dataframe
    return results_DF

In [3]:
datapath = '/data/SHARPS/BERKAY/v0.7/new-data-folds/instances_O12L0P24/'

In [4]:
partition1_FL = data_reader(datapath,'partition1','FL')
partition1_NF = data_reader(datapath,'partition1','NF')

partition2_FL = data_reader(datapath,'partition2','FL')
partition2_NF = data_reader(datapath,'partition2','NF')

partition3_FL = data_reader(datapath,'partition3','FL')
partition3_NF = data_reader(datapath,'partition3','NF')

partition4_FL = data_reader(datapath,'partition4','FL')
partition4_NF = data_reader(datapath,'partition4','NF')

partition5_FL = data_reader(datapath,'partition5','FL')
partition5_NF = data_reader(datapath,'partition5','NF')

In [5]:
partition1 = pd.concat([partition1_FL, partition1_NF], ignore_index=True)
partition2 = pd.concat([partition2_FL, partition2_NF], ignore_index=True)
partition3 = pd.concat([partition3_FL, partition3_NF], ignore_index=True)
partition4 = pd.concat([partition4_FL, partition4_NF], ignore_index=True)
partition5 = pd.concat([partition5_FL, partition5_NF], ignore_index=True)

In [None]:
# partition1 = np.concatenate((partition1_FL, partition1_NF))
# partition2 = np.concatenate((partition2_FL, partition2_NF))
# partition3 = np.concatenate((partition3_FL, partition3_NF))
# partition4 = np.concatenate((partition4_FL, partition4_NF))
# partition5 = np.concatenate((partition5_FL, partition5_NF))

# partition1_labels = np.concatenate((partition1_FL_labels, partition1_NF_labels))
# partition2_labels = np.concatenate((partition2_FL_labels, partition2_NF_labels))
# partition3_labels = np.concatenate((partition3_FL_labels, partition3_NF_labels))
# partition4_labels = np.concatenate((partition4_FL_labels, partition4_NF_labels))
# partition5_labels = np.concatenate((partition5_FL_labels, partition5_NF_labels))

In [6]:
# from sktime.classification.dictionary_based import BOSSEnsemble
# from sktime.transformations.panel.rocket import Rocket
from sktime.classification.kernel_based import RocketClassifier

In [7]:
X_train_TEp5 = pd.concat([partition1, partition2, partition3, partition4], ignore_index=True)
X_train_TEp4 = pd.concat([partition1, partition2, partition3, partition5], ignore_index=True)
X_train_TEp3 = pd.concat([partition1, partition2, partition4, partition5], ignore_index=True)
X_train_TEp2 = pd.concat([partition1, partition3, partition4, partition5], ignore_index=True)
X_train_TEp1 = pd.concat([partition2, partition3, partition4, partition5], ignore_index=True)

y_train_TEp5 = X_train_TEp5['LABEL']
X_train_TEp5 = X_train_TEp5.loc[:, X_train_TEp5.columns != 'LABEL']
y_train_TEp4 = X_train_TEp4['LABEL']
X_train_TEp4 = X_train_TEp4.loc[:, X_train_TEp4.columns != 'LABEL']
y_train_TEp3 = X_train_TEp3['LABEL']
X_train_TEp3 = X_train_TEp3.loc[:, X_train_TEp3.columns != 'LABEL']
y_train_TEp2 = X_train_TEp2['LABEL']
X_train_TEp2 = X_train_TEp2.loc[:, X_train_TEp2.columns != 'LABEL']
y_train_TEp1 = X_train_TEp1['LABEL']
X_train_TEp1 = X_train_TEp1.loc[:, X_train_TEp1.columns != 'LABEL']

In [8]:
X_test_TEp5 = partition5.loc[:, partition5.columns != 'LABEL']
y_test_TEp5 = partition5['LABEL']
X_test_TEp4 = partition4.loc[:, partition4.columns != 'LABEL']
y_test_TEp4 = partition4['LABEL']
X_test_TEp3 = partition3.loc[:, partition3.columns != 'LABEL']
y_test_TEp3 = partition3['LABEL']
X_test_TEp2 = partition2.loc[:, partition2.columns != 'LABEL']
y_test_TEp2 = partition2['LABEL']
X_test_TEp1 = partition1.loc[:, partition1.columns != 'LABEL']
y_test_TEp1 = partition1['LABEL']

# Result for non-optimized class weight Rocket

In [None]:
print('Testing p5')
t0=time.time()
rocket_TEp5 = RocketClassifier(n_jobs=10)
rocket_TEp5.fit(X_train_TEp5, y_train_TEp5)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
t1=time.time()
y_pred_TEp5 = rocket_TEp5.predict(X_test_TEp5)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

np.save('./results/Rocket/y_pred_TEp5.npy', y_pred_TEp5)

In [10]:
print('Testing p4')
t0=time.time()
rocket_TEp4 = RocketClassifier(n_jobs=10)
rocket_TEp4.fit(X_train_TEp4, y_train_TEp4)
t1=time.time()
y_pred_TEp4 = rocket_TEp4.predict(X_test_TEp4)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

np.save('./results/Rocket/y_pred_TEp4.npy', y_pred_TEp4)

Testing p4
	Predicting time: 1537.04 s


In [11]:
print('Testing p3')
t0=time.time()
rocket_TEp3 = RocketClassifier(n_jobs=10)
rocket_TEp3.fit(X_train_TEp3, y_train_TEp3)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
t1=time.time()
y_pred_TEp3 = rocket_TEp3.predict(X_test_TEp3)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

np.save('./results/Rocket/y_pred_TEp3.npy', y_pred_TEp3)

Testing p3
	Training time: 18113.63 s
	Predicting time: 1244.69 s


In [12]:
print('Testing p2')
t0=time.time()
rocket_TEp2 = RocketClassifier(n_jobs=10)
rocket_TEp2.fit(X_train_TEp2, y_train_TEp2)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
t1=time.time()
y_pred_TEp2 = rocket_TEp2.predict(X_test_TEp2)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

np.save('./results/Rocket/y_pred_TEp2.npy', y_pred_TEp2)

Testing p2
	Training time: 17380.49 s
	Predicting time: 2624.53 s


In [13]:
print('Testing p1')
t0=time.time()
rocket_TEp1 = RocketClassifier(n_jobs=10)
rocket_TEp1.fit(X_train_TEp1, y_train_TEp1)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
t1=time.time()
y_pred_TEp1 = rocket_TEp1.predict(X_test_TEp1)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

np.save('./results/Rocket/y_pred_TEp1.npy', y_pred_TEp1)

Testing p1
	Training time: 19717.11 s
	Predicting time: 2185.55 s


# Optimize class weight within RidgetCV

In [9]:
from sklearn.linear_model import RidgeClassifierCV
from sklearn.preprocessing import StandardScaler

from sktime.classification._delegate import _DelegatedClassifier
from sktime.pipeline import make_pipeline
from sktime.transformations.panel.rocket import (
    MiniRocket,
    MiniRocketMultivariate,
    MultiRocket,
    MultiRocketMultivariate,
    Rocket,
)

In [10]:
common_params = {
            "num_kernels": 10000,
            "random_state": None,
            "n_jobs": 10,
        }

result_DF = pd.DataFrame(columns = ['Confusion_Matrix(tn, fp, fn, tp)', 'Accur', 'TSS', 'HSS', 'GSS', 'TPR', 'TNR', 'CBNPr', 'XMPr', 'FAR', 'POFD', 'f1XM', 'f1CBN', 'Experiments'])

In [14]:
cw=5

multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )
print('Testing p5, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp5, y_train_TEp5)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp5)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp5, y_test_TEp5, y_pred_TEp5, multivar_rocket_)
result['Experiments'] = ['TEp5_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

Testing p5, class_weights:{CBN:1, XM:5}
	Training time: 5234.51 s
	Predicting time: 704.83 s


NameError: name 'result_DF' is not defined

In [18]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p4, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp4, y_train_TEp4)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp4 = multivar_rocket_.predict(X_test_TEp4)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp4, y_test_TEp4, y_pred_TEp4, multivar_rocket_)
result['Experiments'] = ['TEp4_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

Testing p4, class_weights:{CBN:1, XM:5}
	Training time: 5788.42 s
	Predicting time: 469.45 s
	Evaluation time: 466.41 s


In [19]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p3, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp3, y_train_TEp3)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp3 = multivar_rocket_.predict(X_test_TEp3)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp3, y_test_TEp3, y_pred_TEp3, multivar_rocket_)
result['Experiments'] = ['TEp3_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

Testing p3, class_weights:{CBN:1, XM:5}
	Training time: 5737.4 s
	Predicting time: 389.61 s
	Evaluation time: 388.91 s


In [20]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p2, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp2, y_train_TEp2)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp2 = multivar_rocket_.predict(X_test_TEp2)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp2, y_test_TEp2, y_pred_TEp2, multivar_rocket_)
result['Experiments'] = ['TEp2_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

Testing p2, class_weights:{CBN:1, XM:5}
	Training time: 4954.15 s
	Predicting time: 800.56 s
	Evaluation time: 825.55 s


In [21]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': [5:10]}),
            )

print('Testing p1, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp1, y_train_TEp1)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp1 = multivar_rocket_.predict(X_test_TEp1)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp1, y_test_TEp1, y_pred_TEp1, multivar_rocket_)
result['Experiments'] = ['TEp1_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds


Testing p1, class_weights:{CBN:1, XM:5}
	Training time: 5304.27 s
	Predicting time: 674.79 s
	Evaluation time: 699.52 s


In [23]:
result_DF

Unnamed: 0,"Confusion_Matrix(tn, fp, fn, tp)",Accur,TSS,HSS,GSS,TPR,TNR,CBNPr,XMPr,FAR,POFD,f1XM,f1CBN,Experiments
0,"[73840, 462, 829, 161]",0.982853,0.156408,0.191415,0.105837,0.162626,0.993782,0.988898,0.258427,0.741573,0.006218,0.199628,0.991334,TEp5_cw1:5
0,"[49664, 390, 954, 211]",0.97376,0.173324,0.226991,0.128026,0.181116,0.992208,0.981153,0.351082,0.648918,0.007792,0.238958,0.98665,TEp4_cw1:5
0,"[40615, 443, 1109, 315]",0.963467,0.210418,0.271766,0.157251,0.221208,0.98921,0.973421,0.415567,0.584433,0.01079,0.288726,0.981252,TEp3_cw1:5
0,"[85684, 599, 1171, 230]",0.979814,0.157226,0.196736,0.1091,0.164168,0.993058,0.986518,0.277443,0.722557,0.006942,0.206278,0.989777,TEp2_cw1:5
0,"[71849, 389, 1121, 134]",0.979454,0.101388,0.142113,0.076492,0.106773,0.994615,0.984638,0.256214,0.743786,0.005385,0.150731,0.989601,TEp1_cw1:5


# Customized scoring and cv function

In [11]:
def train_rocket(X_train,y_train,X_test,y_test,len_cv,test_label='p5'):
    groups = np.array([0] * len_cv[0] + [1] * len_cv[1] + [2] * len_cv[2] + [3] * len_cv[3])
    cv = GroupKFold(n_splits=4)
    train_index_1, train_index_2, train_index_3, train_index_4 = cv.split(X_train,y_train, groups)
    custom_cv = [train_index_1, train_index_2, train_index_3, train_index_4]
    
    common_params = {
            "num_kernels": 1000,
            "random_state": 42,
            "n_jobs": 10,
        }

    result_DF = pd.DataFrame(columns = ['Confusion_Matrix(tn, fp, fn, tp)', 'Accur', 'TSS', 'HSS', 'GSS', 'TPR', 'TNR', 'CBNPr', 'XMPr', 'FAR', 'POFD', 'f1XM', 'f1CBN', 'Experiments'])

    cws=[5,6,7,8,9,10]
    for cw in cws:
        multivar_rocket_ = make_pipeline(
                        Rocket(**common_params),
                        StandardScaler(with_mean=False),
                        RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}, cv=custom_cv, scoring=make_scorer(measurements.geometric_mean, greater_is_better=True))
                    )

        print('Testing '+test_label+', class_weights: {CBN:1, XM:'+str(cw)+'}')
        t0=time.time()
        multivar_rocket_.fit(X_train, y_train)
#         joblib.dump(multivar_rocket_, './models/Rocket/TE'+test_label+'_cw'+str(cw)+'.sav')
        print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

        t1=time.time()
        y_pred = multivar_rocket_.predict(X_test)
        print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

        t2=time.time()
        result = evaluation(X_test, y_test, y_pred, multivar_rocket_)
        result['Experiments'] = ['TE'+test_label+'_cw'+str(cw)]
        result_DF = pd.concat([result_DF,result])
        print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

    #     results = ridge.cv_results_
    #     allFoldResults_DF = pd.DataFrame.from_dict(results)

    result_DF = result_DF.reset_index(drop=True)
    return result_DF

In [136]:
result_TEp5 = train_rocket(X_train_TEp5,y_train_TEp5,X_test_TEp5,y_test_TEp5,[len(partition1),len(partition2),len(partition3),len(partition4)])

Testing p5, class_weights: {CBN:1, XM:5}
	Training time: 8497.37 s
	Predicting time: 715.8 s
	Evaluation time: 677.62 s
Testing p5, class_weights: {CBN:1, XM:6}
	Training time: 8812.5 s
	Predicting time: 677.4 s
	Evaluation time: 712.25 s
Testing p5, class_weights: {CBN:1, XM:7}
	Training time: 8778.13 s
	Predicting time: 678.1 s
	Evaluation time: 682.61 s
Testing p5, class_weights: {CBN:1, XM:8}
	Training time: 8826.94 s
	Predicting time: 687.16 s
	Evaluation time: 679.2 s
Testing p5, class_weights: {CBN:1, XM:9}
	Training time: 8830.43 s
	Predicting time: 682.87 s
	Evaluation time: 679.24 s
Testing p5, class_weights: {CBN:1, XM:10}
	Training time: 8921.35 s
	Predicting time: 686.05 s
	Evaluation time: 720.8 s


In [25]:
result_TEp4[['TSS','HSS','CBNPr','XMPr','FAR','TPR']].mean(axis=0)

TSS      0.279472
HSS      0.249918
CBNPr    0.983659
XMPr     0.257230
FAR      0.742770
TPR      0.300572
dtype: float64

In [147]:
result_TEp5.to_csv('./results/Rocket/TEp5_cw.csv')

In [12]:
result_TEp4 = train_rocket(X_train_TEp4,y_train_TEp4,X_test_TEp4,y_test_TEp4,[len(partition1),len(partition2),len(partition3),len(partition5)],test_label='p4')
result_TEp4.to_csv('./results/Rocket/TEp4_cw.csv')

Testing p4, class_weights: {CBN:1, XM:5}
	Training time: 2389.21 s
	Predicting time: 352.22 s
	Evaluation time: 377.02 s
Testing p4, class_weights: {CBN:1, XM:6}
	Training time: 2450.7 s
	Predicting time: 348.59 s
	Evaluation time: 349.65 s
Testing p4, class_weights: {CBN:1, XM:7}
	Training time: 2342.25 s
	Predicting time: 370.06 s
	Evaluation time: 344.15 s
Testing p4, class_weights: {CBN:1, XM:8}
	Training time: 2338.23 s
	Predicting time: 377.76 s
	Evaluation time: 350.06 s
Testing p4, class_weights: {CBN:1, XM:9}
	Training time: 2346.33 s
	Predicting time: 349.41 s
	Evaluation time: 377.75 s
Testing p4, class_weights: {CBN:1, XM:10}
	Training time: 2335.74 s
	Predicting time: 348.91 s
	Evaluation time: 349.86 s


In [13]:
result_TEp3 = train_rocket(X_train_TEp3,y_train_TEp3,X_test_TEp3,y_test_TEp3,[len(partition1),len(partition2),len(partition4),len(partition5)],test_label='p3')
result_TEp3.to_csv('./results/Rocket/TEp3_cw.csv')

Testing p3, class_weights: {CBN:1, XM:5}
	Training time: 2438.71 s
	Predicting time: 291.33 s
	Evaluation time: 289.75 s
Testing p3, class_weights: {CBN:1, XM:6}
	Training time: 2422.9 s
	Predicting time: 321.6 s
	Evaluation time: 285.88 s
Testing p3, class_weights: {CBN:1, XM:7}
	Training time: 2390.5 s
	Predicting time: 286.32 s
	Evaluation time: 285.08 s
Testing p3, class_weights: {CBN:1, XM:8}
	Training time: 2427.8 s
	Predicting time: 287.79 s
	Evaluation time: 285.31 s
Testing p3, class_weights: {CBN:1, XM:9}
	Training time: 2398.0 s
	Predicting time: 286.1 s
	Evaluation time: 288.84 s
Testing p3, class_weights: {CBN:1, XM:10}
	Training time: 2433.95 s
	Predicting time: 285.64 s
	Evaluation time: 286.5 s


In [14]:
result_TEp2 = train_rocket(X_train_TEp2,y_train_TEp2,X_test_TEp2,y_test_TEp2,[len(partition1),len(partition3),len(partition4),len(partition5)],test_label='p2')
result_TEp2.to_csv('./results/Rocket/TEp2_cw.csv')

Testing p2, class_weights: {CBN:1, XM:5}
	Training time: 2034.77 s
	Predicting time: 597.1 s
	Evaluation time: 589.38 s
Testing p2, class_weights: {CBN:1, XM:6}
	Training time: 2049.47 s
	Predicting time: 594.68 s
	Evaluation time: 590.88 s
Testing p2, class_weights: {CBN:1, XM:7}
	Training time: 2018.59 s
	Predicting time: 608.44 s
	Evaluation time: 608.11 s
Testing p2, class_weights: {CBN:1, XM:8}
	Training time: 2140.0 s
	Predicting time: 608.57 s
	Evaluation time: 604.64 s
Testing p2, class_weights: {CBN:1, XM:9}
	Training time: 2054.99 s
	Predicting time: 595.73 s
	Evaluation time: 591.77 s
Testing p2, class_weights: {CBN:1, XM:10}
	Training time: 2057.74 s
	Predicting time: 597.38 s
	Evaluation time: 598.53 s


In [15]:
result_TEp1 = train_rocket(X_train_TEp1,y_train_TEp1,X_test_TEp1,y_test_TEp1,[len(partition2),len(partition3),len(partition4),len(partition5)],test_label='p1')
result_TEp1.to_csv('./results/Rocket/TEp1_cw.csv')

Testing p1, class_weights: {CBN:1, XM:5}
	Training time: 2153.82 s
	Predicting time: 499.6 s
	Evaluation time: 497.84 s
Testing p1, class_weights: {CBN:1, XM:6}
	Training time: 2140.81 s
	Predicting time: 498.11 s
	Evaluation time: 500.06 s
Testing p1, class_weights: {CBN:1, XM:7}
	Training time: 2182.22 s
	Predicting time: 497.47 s
	Evaluation time: 498.38 s
Testing p1, class_weights: {CBN:1, XM:8}
	Training time: 2168.38 s
	Predicting time: 497.3 s
	Evaluation time: 507.16 s
Testing p1, class_weights: {CBN:1, XM:9}
	Training time: 2134.12 s
	Predicting time: 496.78 s
	Evaluation time: 548.48 s
Testing p1, class_weights: {CBN:1, XM:10}
	Training time: 2134.77 s
	Predicting time: 495.44 s
	Evaluation time: 495.37 s


In [23]:
result_all = pd.concat([result_TEp1,result_TEp2,result_TEp3,result_TEp4,result_TEp5])
result_all = result_all.reset_index(drop=True)
result_all.to_csv('./results/Rocket/TE_cwALL.csv')

# cw10

In [22]:
cw=10

multivar_rocket_ = make_pipeline(
                MultiRocketMultivariate(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )
print('Testing p5, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp5, y_train_TEp5)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp5)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp5, y_test_TEp5, y_pred_TEp5, multivar_rocket_)
result['Experiments'] = ['TEp5_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

Testing p5, class_weights:{CBN:1, XM:10}


MemoryError: Unable to allocate 152. GiB for an array with shape (254878, 79968) and data type float64

In [None]:
multivar_rocket_ = make_pipeline(
                MultiRocketMultivariate(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p4, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp4, y_train_TEp4)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp4 = multivar_rocket_.predict(X_test_TEp4)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp4, y_test_TEp4, y_pred_TEp4, multivar_rocket_)
result['Experiments'] = ['TEp4_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                MultiRocketMultivariate(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p3, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp3, y_train_TEp3)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp3 = multivar_rocket_.predict(X_test_TEp3)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp3, y_test_TEp3, y_pred_TEp3, multivar_rocket_)
result['Experiments'] = ['TEp3_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                MultiRocketMultivariate(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p2, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp2, y_train_TEp2)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp2 = multivar_rocket_.predict(X_test_TEp2)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp2, y_test_TEp2, y_pred_TEp2, multivar_rocket_)
result['Experiments'] = ['TEp2_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                MultiRocketMultivariate(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p1, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp1, y_train_TEp1)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp1 = multivar_rocket_.predict(X_test_TEp1)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp1, y_test_TEp1, y_pred_TEp1, multivar_rocket_)
result['Experiments'] = ['TEp1_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds


# cw20

In [None]:
cw=20

multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )
print('Testing p5, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp5, y_train_TEp5)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp5)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp5, y_test_TEp5, y_pred_TEp5, multivar_rocket_)
result['Experiments'] = ['TEp5_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p4, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp4, y_train_TEp4)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp4 = multivar_rocket_.predict(X_test_TEp4)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp4, y_test_TEp4, y_pred_TEp4, multivar_rocket_)
result['Experiments'] = ['TEp4_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p3, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp3, y_train_TEp3)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp3 = multivar_rocket_.predict(X_test_TEp3)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp3, y_test_TEp3, y_pred_TEp3, multivar_rocket_)
result['Experiments'] = ['TEp3_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p2, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp2, y_train_TEp2)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp2 = multivar_rocket_.predict(X_test_TEp2)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp2, y_test_TEp2, y_pred_TEp2, multivar_rocket_)
result['Experiments'] = ['TEp2_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
multivar_rocket_ = make_pipeline(
                Rocket(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )

print('Testing p1, class_weights:{CBN:1, XM:'+str(cw)+'}')
t0=time.time()
multivar_rocket_.fit(X_train_TEp1, y_train_TEp1)
print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds

t1=time.time()
y_pred_TEp1 = multivar_rocket_.predict(X_test_TEp1)
print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

t2=time.time()
result = evaluation(X_test_TEp1, y_test_TEp1, y_pred_TEp1, multivar_rocket_)
result['Experiments'] = ['TEp1_cw1:'+str(cw)]
result_DF = pd.concat([result_DF,result])
print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

In [None]:
common_params = {
            "num_kernels": 10000,
            "random_state": None,
            "max_dilations_per_kernel": 32,
            "n_jobs": 10,
        }

results_DF = pd.DataFrame(columns = ['Confusion_Matrix(tn, fp, fn, tp)', 'Accur', 'TSS', 'HSS', 'GSS', 'TPR', 'TNR', 'CBNPr', 'XMPr', 'FAR', 'POFD', 'f1XM', 'f1CBN', 'Experiments'])
cw_list = [5,6,7,8,9,10,20,30]

for cw in cw_list:
    multivar_rocket_ = make_pipeline(
                MultiRocketMultivariate(**common_params),
                StandardScaler(with_mean=False),
                RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), class_weight={'CBN': 1, 'XM': cw}),
            )
    print('Testing p5, class_weights:{CBN:1, XM:'+str(cw)+'}')
    t0=time.time()
    multivar_rocket_.fit(X_train_TEp5, y_train_TEp5)
    print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
    
    t1=time.time()
    y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp5)
    print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

    t2=time.time()
    result = evaluation(X_test_TEp5, y_test_TEp5, y_pred_TEp5, multivar_rocket_)
    result['Experiments'] = ['TEp5_cw1:'+str(cw)]
    result_DF = pd.concat([result_DF,result])
    print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

    
    print('Testing p4, class_weights:{CBN:1, XM:'+str(cw)+'}')
    t0=time.time()
    multivar_rocket_.fit(X_train_TEp4, y_train_TEp4)
    print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
    
    t1=time.time()
    y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp4)
    print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

    t2=time.time()
    result = evaluation(X_test_TEp4, y_test_TEp4, y_pred_TEp4, multivar_rocket_)
    result['Experiments'] = ['TEp4_cw1:'+str(cw)]
    result_DF = pd.concat([result_DF,result])
    print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

    
    print('Testing p3, class_weights:{CBN:1, XM:'+str(cw)+'}')
    t0=time.time()
    multivar_rocket_.fit(X_train_TEp3, y_train_TEp3)
    print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
    
    t1=time.time()
    y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp3)
    print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

    t2=time.time()
    result = evaluation(X_test_TEp3, y_test_TEp3, y_pred_TEp3, multivar_rocket_)
    result['Experiments'] = ['TEp3_cw1:'+str(cw)]
    result_DF = pd.concat([result_DF,result])
    print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

    
    print('Testing p2, class_weights:{CBN:1, XM:'+str(cw)+'}')
    t0=time.time()
    multivar_rocket_.fit(X_train_TEp2, y_train_TEp2)
    print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
    
    t1=time.time()
    y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp2)
    print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

    t2=time.time()
    result = evaluation(X_test_TEp2, y_test_TEp2, y_pred_TEp2, multivar_rocket_)
    result['Experiments'] = ['TEp2_cw1:'+str(cw)]
    result_DF = pd.concat([result_DF,result])
    print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds

    
    print('Testing p1, class_weights:{CBN:1, XM:'+str(cw)+'}')
    t0=time.time()
    multivar_rocket_.fit(X_train_TEp1, y_train_TEp1)
    print("\tTraining time:", round(time.time()-t0, 2), "s") # the time would be round to 3 decimal in seconds
    
    t1=time.time()
    y_pred_TEp5 = multivar_rocket_.predict(X_test_TEp1)
    print("\tPredicting time:", round(time.time()-t1, 2), "s") # the time would be round to 3 decimal in seconds

    t2=time.time()
    result = evaluation(X_test_TEp1, y_test_TEp1, y_pred_TEp1, multivar_rocket_)
    result['Experiments'] = ['TEp1_cw1:'+str(cw)]
    result_DF = pd.concat([result_DF,result])
    print("\tEvaluation time:", round(time.time()-t2, 2), "s") # the time would be round to 3 decimal in seconds


Testing p5, class_weights:{CBN:1, XM:5}


In [17]:
result_TEp5 = evaluation(X_test_TEp5, y_test_TEp5, y_pred_TEp5, rocket_TEp5)

In [18]:
result_TEp5

Unnamed: 0,"Confusion_Matrix(tn, fp, fn, tp)",Accur,TSS,HSS,GSS,TPR,TNR,CBNPr,XMPr,FAR,POFD,f1XM,f1CBN
0,"[74300, 2, 988, 2]",0.986851,0.001993,0.003919,0.001963,0.00202,0.999973,0.986877,0.5,0.5,2.7e-05,0.004024,0.993382


In [20]:
# result_TEp5 = evaluation(X_test_transform_TEp5, y_test_TEp5, y_pred_TEp5, RC_TEp5)
result_TEp4 = evaluation(X_test_TEp4, y_test_TEp4, y_pred_TEp4, rocket_TEp4)
result_TEp3 = evaluation(X_test_TEp3, y_test_TEp3, y_pred_TEp3, rocket_TEp3)
result_TEp2 = evaluation(X_test_TEp2, y_test_TEp2, y_pred_TEp2, rocket_TEp2)
result_TEp1 = evaluation(X_test_TEp1, y_test_TEp1, y_pred_TEp1, rocket_TEp1)

In [21]:
result_df = pd.concat([result_TEp5,result_TEp4,result_TEp3,result_TEp2,result_TEp1])
result_df = result_df.reset_index(drop=True)
result_df['Experiment'] = ['TEp5','TEp4','TEp3','TEp2','TEp1']
result_df

Unnamed: 0,"Confusion_Matrix(tn, fp, fn, tp)",Accur,TSS,HSS,GSS,TPR,TNR,CBNPr,XMPr,FAR,POFD,f1XM,f1CBN,Experiment
0,"[74300, 2, 988, 2]",0.986851,0.001993,0.003919,0.001963,0.00202,0.999973,0.986877,0.5,0.5,2.7e-05,0.004024,0.993382,TEp5
1,"[50050, 4, 1159, 6]",0.977294,0.00507,0.009829,0.004939,0.00515,0.99992,0.977367,0.6,0.4,8e-05,0.010213,0.988515,TEp4
2,"[41057, 1, 1422, 2]",0.966503,0.00138,0.002663,0.001333,0.001404,0.999976,0.966525,0.666667,0.333333,2.4e-05,0.002803,0.982966,TEp3
3,"[86282, 1, 1400, 1]",0.984022,0.000702,0.00138,0.00069,0.000714,0.999988,0.984033,0.5,0.5,1.2e-05,0.001426,0.991947,TEp2
4,"[72236, 2, 1251, 4]",0.982951,0.00316,0.006183,0.003101,0.003187,0.999972,0.982977,0.666667,0.333333,2.8e-05,0.006344,0.991402,TEp1


In [22]:
result_df.to_csv('./results/Rocket/result.csv')

In [89]:
# X_train_TEp5 = np.concatenate((partition1, partition2, partition3, partition4))
# y_train_TEp5 = np.concatenate((partition1_labels, partition2_labels, partition3_labels, partition4_labels))

# clf = BOSSEnsemble()
# clf.fit(X_train_TEp5, y_train_TEp5)
# y_pred = clf.predict(partition4)

ValueError: Data seen by BOSSEnsemble instance has missing values and multivariate series, but this BOSSEnsemble instance cannot handle missing values or multivariate series. Calls with missing values or multivariate series may result in error or unreliable results.