In [1]:
from sktime.utils.data_io import load_from_tsfile_to_dataframe
import glob
import joblib
import numpy as np
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
import seaborn as sns
import sktime 
from sktime.datatypes._panel._convert import from_3d_numpy_to_nested
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.classification.dictionary_based import BOSSEnsemble
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.classification.shapelet_based import MrSEQLClassifier
from sktime.datasets import load_basic_motions
from sktime.transformations.panel.compose import ColumnConcatenator
from imblearn.under_sampling import RandomUnderSampler
from sktime.classification.interval_based import RandomIntervalSpectralForest
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
## Loading the data from .ts files
features=['USFLUX', 'TOTUSJH', 'ABSNJZH', 'SAVNCPP', 'TOTBSQ', 'TOTPOT', 'TOTUSJZ']
X_features,Y_target=load_from_tsfile_to_dataframe("MVTS_Data_Updated/MVTS_flares_data_updated/MVTS_flares_data_updated.ts",
                                           return_separate_X_and_y=True,)
X_features.columns=features

In [3]:
## Splitting data into train test and validation
X_train=X_features.iloc[0:9289]
y_train=Y_target[0:9289]
X_test=X_features.iloc[9289:19667]
y_test=Y_target[9289:19667]
X_validation=X_features.iloc[19667:30216]
y_validation=Y_target[19667:30216]

In [4]:
##  Random Undersampling of the data instances
undersample = RandomUnderSampler(sampling_strategy=0.25)
X_train_under, y_train_under = undersample.fit_resample(X_train, y_train,)

# Column Ensemble Method For Random UnderSample Data

In [5]:
clf = ColumnEnsembleClassifier(
    estimators=[
        ("TSF0_0", TimeSeriesForestClassifier(n_estimators=200), ["USFLUX"]),
         ("TSF0_1", TimeSeriesForestClassifier(n_estimators=200), ["TOTUSJH"]),
         ("TSF0_2", TimeSeriesForestClassifier(n_estimators=200), ["ABSNJZH"]),
         ("TSF0_3", TimeSeriesForestClassifier(n_estimators=200), ["SAVNCPP"]),
         ("TSF0_4", TimeSeriesForestClassifier(n_estimators=200), ["TOTBSQ"]),
         ("TSFO_5", TimeSeriesForestClassifier(n_estimators=200), ["TOTPOT"]),
         ("TSF0_6", TimeSeriesForestClassifier(n_estimators=200), ["TOTUSJZ"])
     ]
)
clf.fit(X_train_under, y_train_under)
clf.score(X_test, y_test)

0.9226247831952207

# Column Concatenator Method using Time Series Forest

In [None]:
steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=250)),
]
clf_cat = Pipeline(steps)
clf_cat.fit(X_train, y_train)
clf_cat.score(X_test, y_test)



# Model Evaluation 

Note: We can change the classifier in the code below to get performace Measure. Right now it is selected for Column Ensemble Method

In [6]:
## Prediction by the model
y_pred=clf.predict(X_test)

In [7]:
# Confusion Matrix
cm=confusion_matrix(y_test, y_pred)
df_cm=pd.DataFrame(cm)
df_cm

Unnamed: 0,0,1
0,8622,355
1,448,953


In [8]:
## Getting TP, TN, FP, FN from CM
TN=df_cm.iloc[0,0]
FP=df_cm.iloc[0,1]
FN=df_cm.iloc[1,0]
TP=df_cm.iloc[1,1]


In [9]:
## Function to calculate matrix
def accuracy_measure(TN,FP,FN,TP):
    P = TP + FN
    N = FP + TN
    amdf = pd.DataFrame()
    amdf['TP'] = [TP]
    amdf['FP'] = FP
    amdf['TN'] = TN
    amdf['FN'] = FN
    amdf['P'] = P
    amdf['N'] = N
    return(amdf)

In [10]:
accuracy_measure(TN,FP,FN,TP)

Unnamed: 0,TP,FP,TN,FN,P,N
0,953,355,8622,448,1401,8977


In [11]:
## Functions to calculate HSS, TSS, GSS, FAR
def evaluate(amdf):
    tss = (amdf.TP / (amdf.TP + amdf.FN)) - (amdf.FP / (amdf.FP + amdf.TN))
    hss = (2 * ((amdf.TP * amdf.TN) - (amdf.FN * amdf.FP)) / (amdf.P * (amdf.FN + amdf.TN)
                                                              + amdf.N * (amdf.TP + amdf.FP)))
    precXM = amdf.TP / (amdf.TP + amdf.FP)
    recallXM = amdf.TP / (amdf.TP + amdf.FN)
    precCBN = amdf.TN / (amdf.TN + amdf.FN)
    recallCBN = amdf.TN / (amdf.TN + amdf.FP)
    far = amdf.FP / (amdf.FP + amdf.TP)
    ch = (amdf.TP + amdf.FP) * (amdf.TP + amdf.FN) / (amdf.TP + amdf.FP + amdf.FN + amdf.TN)
    gss = (amdf.TP - ch) / (amdf.TP + amdf.FP + amdf.FN - ch)
    eval_metrics = pd.DataFrame()
    eval_metrics['TSS'] = [tss.iloc[0]]
    eval_metrics['HSS'] = hss.iloc[0]
    eval_metrics['GSS'] = gss.iloc[0]
    eval_metrics['Precision(Class 1)'] = precXM.iloc[0]
    eval_metrics['Recall(Class 1)'] = recallXM.iloc[0]
    eval_metrics['Precision(Class 0)'] = precCBN.iloc[0]
    eval_metrics['Recall( Class 0)'] = recallCBN.iloc[0]
    eval_metrics['FAR'] = far.iloc[0]
    return(eval_metrics)

In [12]:
amdf=accuracy_measure(TN,FP,FN,TP)
Test_evaluation_report=evaluate(amdf)
Test_evaluation_report

Unnamed: 0,TSS,HSS,GSS,Precision(Class 1),Recall(Class 1),Precision(Class 0),Recall( Class 0),FAR
0,0.640683,0.659146,0.491587,0.728593,0.680228,0.950606,0.960454,0.271407


In [13]:
## Doing the same thing for Validation set
y_pred_val=clf.predict(X_validation)

In [14]:
cm_val=confusion_matrix(y_validation, y_pred_val)

In [15]:
df_cm_val=pd.DataFrame(cm_val)
df_cm_val

Unnamed: 0,0,1
0,8402,723
1,361,1063


In [16]:
TN_val=df_cm_val.iloc[0,0]
FP_val=df_cm_val.iloc[0,1]
FN_val=df_cm_val.iloc[1,0]
TP_val=df_cm_val.iloc[1,1]


In [17]:
amdf_valid=accuracy_measure(TN_val,FP_val,FN_val,TP_val)
Valid_evaluation_report=evaluate(amdf_valid)
Valid_evaluation_report

Unnamed: 0,TSS,HSS,GSS,Precision(Class 1),Recall(Class 1),Precision(Class 0),Recall( Class 0),FAR
0,0.667256,0.602613,0.431243,0.595185,0.746489,0.958804,0.920767,0.404815
