In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import glob
import os
import feather
from sklearn.pipeline import make_pipeline
from sktime.datasets import load_from_tsfile_to_dataframe
sns.set(style="whitegrid")

--------------------------------------

### Preparing the frames to store information

In [None]:
datasets = ['Adiac', 'Fish', 'OliveOil', 'Phoneme', 'ShapesAll', 'SwedishLeaf', 'WordSynonyms']

# row corresponds to classifier, col to dataset
accuracy = pd.DataFrame(columns=datasets)

In [None]:
nbrRuns = 1

classifiers = ['RF_sumCl', 
               'RF_sklearn', 
               'RF_catch22',
               'RF_kats',
               'RF_tsfeatures',
               'RF_tsfel',
               'RF_tsfresh']

Adiac_pred        = pd.DataFrame(columns=classifiers)
Fish_pred         = pd.DataFrame(columns=classifiers)
OliveOil_pred     = pd.DataFrame(columns=classifiers)
Phoneme_pred      = pd.DataFrame(columns=classifiers)
ShapesAll_pred    = pd.DataFrame(columns=classifiers)
SwedishLeaf_pred  = pd.DataFrame(columns=classifiers)
WordSynonyms_pred = pd.DataFrame(columns=classifiers)

predictionFrames = [Adiac_pred,
                    Fish_pred,
                    OliveOil_pred,
                    Phoneme_pred,
                    ShapesAll_pred,
                    SwedishLeaf_pred,
                    WordSynonyms_pred]

--------------------------------------

### Starting with SKTIME

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sktime.classification.feature_based import SummaryClassifier

sklearn_classifier = RandomForestClassifier()
sc_classifier       = SummaryClassifier(estimator=sklearn_classifier)

In [None]:
score2 = []


for dataset, i in zip(datasets, range(len(predictionFrames))):
    print(dataset)
    
    tmpScores = []
    
    # load training data
    X_train, y_train = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')

    # load test data
    X_test, y_test = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    
    ############################
    # sktime - SummaryClassifier
    
    for h in range(nbrRuns):
    
        if dataset == 'Phoneme':
            tmpScores.append(np.nan)
            continue
        else:
            sc_classifier.fit(X_train, y_train)
            tmpScores.append(sc_classifier.score(X_test, y_test))
    
    #print(tmpScores)
    score2.append(max(tmpScores))
            
    

In [None]:
accuracy.loc[0] = score2
accuracy

--------------------------------------

### SKLEARN - basic

In [None]:
def getFrame(dataset, labels):
    
    # stores series 
    finalFrame = pd.DataFrame()
    
    for row in dataset.iterrows():
        finalFrame = pd.concat([finalFrame, row[1].values[0].to_frame().T], ignore_index=True)
    
    # adds labels at the end of the frame
    finalFrame['label'] = [int(i) for i in labels]

    return finalFrame

In [None]:
nbrRuns = 20
score3 = []

for dataset, i in zip(datasets, range(len(predictionFrames))):
    
    tmpScores = []
    
    # load training data
    X_train, y_train = load_from_tsfile_to_dataframe('data/' + dataset + '_TRAIN.ts')
    trainData = getFrame(X_train, y_train)

    # load test data
    X_test, y_test = load_from_tsfile_to_dataframe('data/' + dataset + '_TEST.ts')
    testData = getFrame(X_test, y_test)
    
    for _ in range(nbrRuns):
        
        sklearn_classifier.fit(trainData.iloc[:, :-1], trainData['label'])
        tmpScores.append(sklearn_classifier.score(testData.iloc[:, :-1], testData['label']))
        
    #print(tmpScores)
    score3.append(max(tmpScores))

In [None]:
accuracy.loc[1] = score3
accuracy

--------------------------------------

### SKLEARN x Features

In [None]:
catch22    = pd.read_feather('featureFrames/output_catch22.feather')
kats       = pd.read_feather('featureFrames/output_kats.feather')
tsfeatures = pd.read_feather('featureFrames/test.feather')
tsfel      = pd.read_feather('featureFrames/output_tsfel.feather')
tsfresh    = pd.read_feather('featureFrames/output_tsfresh.feather')

extractors = [catch22,
              kats,
              tsfeatures,
              tsfel,
              tsfresh]

In [None]:
j = 2

for ex, name in zip(extractors, classifiers[-5:]) :
    scoreX = []
    
    for dataset, i in zip(datasets, range(len(predictionFrames))):
        
        train = ex.loc[(ex['data'] == dataset) & (ex['set'] == 'train')]
        test  = ex.loc[(ex['data'] == dataset) & (ex['set'] == 'test')]
        
        tmpScores = []
        
        for _ in range(nbrRuns):
        
            sklearn_classifier.fit(train.iloc[:, :-3], train['label'])
            tmpScores.append(sklearn_classifier.score(test.iloc[:, :-3], test['label']))
        
        scoreX.append(max(tmpScores))
    
    accuracy.loc[j] = scoreX
    j += 1

In [None]:
accuracy

In [None]:
# old results with 1 run 
# accuracy

--------------------------------------

### Plotting accuracy

In [None]:
def plotPerDataset(axes, y_pos, nbrPlots, df):
    
    for c in range(nbrPlots):
        
        axes[c].set_title(df.columns[c])
        axes[c].set_xlabel('Classifier')
        axes[c].set_ylabel('Accuracy')
        
        # Create bars
        axes[c].bar(y_pos, df.iloc[:, c], color='maroon')
        
        #
        axes[c].set_xticks(y_pos, classifiers, rotation='vertical')
    
    
def plotPerClassifier(axes, y_pos, nbrPlots, df):
    
    for c in range(nbrPlots):
        
        axes[c].set_title(classifiers[c])
        axes[c].set_xlabel('Dataset')
        axes[c].set_ylabel('Accuracy')
        
        # Create bars
        axes[c].bar(y_pos, df.iloc[c], color='maroon')
        
        #
        axes[c].set_xticks(y_pos, datasets, rotation='vertical') 
    

In [None]:
def plotAccuracy(df, perDataset = True):
    
    cols = 3
    
    if perDataset:
        nbrPlots = len(df.columns)
        y_pos    = np.arange(len(df))
    else:
        nbrPlots = len(df)
        y_pos    = np.arange(len(df.columns))
    
    rows = int(np.ceil(nbrPlots/cols))
    
    fig, ax = plt.subplots(rows, cols, figsize=(30, 30))
    # using padding
    fig.tight_layout(pad=14.0)
    
    axes = ax.flatten()
    
    if perDataset:
        plotPerDataset(axes, y_pos, nbrPlots, df)
    else:
        plotPerClassifier(axes, y_pos, nbrPlots, df)
    
     

#### Plot for each dataset

In [None]:
plotAccuracy(accuracy)

#### Plot for each classifier

In [None]:
plotAccuracy(accuracy, False)

--------------------------------------