In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import timeit

from numpy import genfromtxt

from sklearn.model_selection import LeaveOneOut

from sklearn import svm
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import neighbors
from sklearn.naive_bayes import GaussianNB

### From the test_acc.csv (edited from the results obtained from timeseriesclassification.com) and the features extracted from the training datasets using tsfresh (use the run_pre_tsfresh.sh to obtain it), we organize the meta-data in this section.

In [None]:
test_acc = pd.read_csv('test_acc.csv')
test_acc.head()

In [None]:
y = np.array(test_acc.drop('TESTACC',axis=1)).argmax(axis=1)+1 # meta-targets

In [None]:
X = np.empty([108,3148]) # meta-attributes

for i in range(len(y)):
    
    dataset = test_acc['TESTACC'][i]
    print(dataset)
    
    this_feature_set = pd.read_csv('tsfresh/' + dataset + '_TRAIN.csv').drop(labels=["Unnamed: 0"],axis=1)
    X[i] = np.hstack([this_feature_set.mean(axis=0),this_feature_set.std(axis=0)])

In [None]:
np.savetxt("meta-attributes.csv", X, delimiter=",")
np.savetxt("meta-targets.csv", y, delimiter=",")

### Start here if you already have extracted the meta-knowledge. 

In [None]:
# X = genfromtxt('meta-attributes.csv', delimiter=',')
# y = genfromtxt('meta-targets.csv', delimiter=',')

df = pd.DataFrame(X)
X = df.replace([np.inf, -np.inf], np.nan).dropna(axis=1).to_numpy() #cleaning

#### Defining useful methods

In [None]:
def show_results(preds):
    
    test_acc = pd.read_csv('test_acc.csv')
    topline = np.array(test_acc.drop('TESTACC',axis=1)).max(axis=1)
    baseline = np.array(test_acc.drop('TESTACC',axis=1)).mean(axis=1)
    
    accs_meta = []
    for i in range(len(preds)):
        accs_meta.append(test_acc.iloc[i][int(preds[i])])
        
    ts_chief = test_acc['TS-CHIEF']
    
    print('Mean Acc (std):')
    
    print('Topline: ' + "{:.4f}".format(np.mean(topline)) +
         '(' + "{:.4f}".format(np.std(topline)) + ')' + 
         ' - Median =' + "{:.4f}".format(np.median(topline)))
    
    print('MetaL: ' + "{:.4f}".format(np.mean(accs_meta)) +
         '(' + "{:.4f}".format(np.std(accs_meta)) + ')' + 
         ' - Median =' + "{:.4f}".format(np.median(accs_meta)))
    
    print('TS-CHIEF: ' + "{:.4f}".format(np.mean(ts_chief)) +
         '(' + "{:.4f}".format(np.std(ts_chief)) + ')' + 
         ' - Median =' + "{:.4f}".format(np.median(ts_chief)))
    
    print(' ')
    
    print('MetaL vs TS-CHIEF')
    print('Wins:' + str(np.sum(accs_meta > ts_chief)))
    print('Loses:' + str(np.sum(accs_meta < ts_chief)))
    print('Draws:' + str(np.sum(accs_meta == ts_chief)))
    
    print(' ')
    
    print('MetaL vs topline')
    print('Wins:' + str(np.sum(accs_meta > topline)))
    print('Loses:' + str(np.sum(accs_meta < topline)))
    print('Draws:' + str(np.sum(accs_meta == topline)))
    
    print(' ')
    
    print('MetaL vs baseline')
    print('Wins:' + str(np.sum(accs_meta > baseline)))
    print('Loses:' + str(np.sum(accs_meta < baseline)))
    print('Draws:' + str(np.sum(accs_meta == baseline)))

In [None]:
def run_metamodel(option='random_forest', random_state=42):
    
    preds = []
    
    loo = LeaveOneOut()
    loo.get_n_splits(X)
    
    total_time = 0
    
    for train_index, test_index in loo.split(X):
        
        # print("TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        if (option=='1nn'):
            model = neighbors.KNeighborsClassifier(1)
        elif (option=='5nn'):
            model = neighbors.KNeighborsClassifier()
        elif (option=='svm'):
            model = svm.SVC()
        elif (option=='gbc'):
            model = GradientBoostingClassifier(random_state=random_state)
        elif (option=='nb'):
            model = GaussianNB()
        else:
            model = RandomForestClassifier(random_state=random_state)
            
        start = timeit.default_timer()
        model.fit(X_train, y_train)
        stop = timeit.default_timer()
        total_time += stop - start
        
        y_hat = model.predict(X_test)
        preds.append(y_hat)
    
    print('LOO runtime (seconds): ', total_time)
    
    return preds

#### Runing/training the metamodels

In [None]:
rf = []
rf_fs = []


print('MetaL-CATS-1NN')
nn = run_metamodel(option='1nn')

print('MetaL-CATS-5NN')
knn = run_metamodel(option='5nn')

print('MetaL-CATS-SVM')
svc = run_metamodel(option='svc')

print('MetaL-CATS-NB')
nb = run_metamodel(option='nb')


print('MetaL-CATS-RF (10 runs)')
for i in range(10):
    print('RF: Random seed = ' + str(i))
    rf.append(run_metamodel(option='random_forest', random_state=i))

In [None]:
'''
run it if you are interested in saving the results
'''
# np.save('results/rf.npy',rf)
# np.save('results/nn.npy',nn)
# np.save('results/knn.npy',knn)
# np.save('results/svc.npy',svc)
# np.save('results/nb.npy',nb)

In [None]:
'''
run it if you are interested in strating from the saved results
'''
# np.save('results/rf.npy',rf)
# np.save('results/nn.npy',nn)
# np.save('results/knn.npy',knn)
# np.save('results/svc.npy',svc)
# np.save('results/nb.npy',nb)

In [None]:
print('------------\n1-nn\n')
show_results(nn)
print('------------\n5-nn\n')
show_results(knn)
print('------------\nGaussian NB\n')
show_results(nb)
print('------------\nSVM\n')
show_results(svc)
print('------------\nRF (seed=0)\n')
show_results(rf[0])
print('------------')

#### Comparative and ploting routines

In [None]:
topline = np.array(test_acc.drop('TESTACC',axis=1)).max(axis=1)
baseline = np.array(test_acc.drop('TESTACC',axis=1)).mean(axis=1)
worstcase = np.array(test_acc.drop('TESTACC',axis=1)).min(axis=1)

ts_chief = test_acc['TS-CHIEF']
hive_cote = test_acc['HIVE-COTE v1.0']

def get_accs(preds):
    accs_meta = []
    for i in range(len(preds)):
        accs_meta.append(test_acc.iloc[i][int(preds[i])])
        
    return accs_meta

In [None]:
# to a better evaluation of RF
rf_avg = np.zeros(108)
for i in range(10):
    rf_avg += get_accs(rf[i])
rf_avg /= 10

print(np.mean(rf_avg))
print(np.std(rf_avg))
print(np.median(rf_avg))
print('TS-CHIEF vs MetaL-RF')
print('Wins:' + str(np.sum(ts_chief > rf_avg)))
print('Loses:' + str(np.sum(ts_chief < rf_avg)))
print('Draws:' + str(np.sum(ts_chief == rf_avg)))

In [None]:
dicio = {"Accuracy" : np.hstack((baseline, get_accs(nn), 
                                 get_accs(knn), get_accs(nb),
                                 rf_avg,get_accs(svc),
                                 ts_chief,topline)).T,
        "Algorithm" : np.hstack((
                        ["Baseline"]*len(topline),
                        ["MetaL-CATS-1NN"]*len(topline), 
                        ["MetaL-CATS-5NN"]*len(topline), 
                        ["MetaL-CATS-NB"]*len(topline),
                        ["MetaL-CATS-RF"]*len(topline), 
                        ["MetaL-CATS-SVM"]*len(topline), 
                        ["TS-CHIEF"]*len(topline),
                        ["Topline"]*len(topline)
                                )).T}

results = pd.DataFrame(dicio)

In [None]:
f = plt.figure(figsize=[8,7])
sns.set_theme(style="whitegrid")
ax = sns.boxplot(x="Algorithm", y="Accuracy", data=results)
plt.xticks(rotation=90)
f.savefig("boxplot.pdf")

In [None]:
# TS-CHIEF vs HIVE-COTE
print(hive_cote.mean())
print(ts_chief.mean())

print('TS-CHIEF vs HIVE-COTE')
print('Wins:' + str(np.sum(ts_chief > hive_cote)))
print('Loses:' + str(np.sum(ts_chief < hive_cote)))
print('Draws:' + str(np.sum(ts_chief == hive_cote)))

In [None]:
svc_acc = get_accs(svc)
print(np.mean(svc_acc))
print('TS-CHIEF vs MetaL-SVM')
print('Wins:' + str(np.sum(ts_chief > svc_acc)))
print('Loses:' + str(np.sum(ts_chief < svc_acc)))
print('Draws:' + str(np.sum(ts_chief == svc_acc)))

In [None]:
accs = test_acc.drop('TESTACC',axis=1)

svc_acc = get_accs(svc)
print('Algorithm\tWins/Losses')

# cada um contra baseline
for  this_alg in accs.keys():
    this_acc = test_acc[this_alg]
    print(this_alg + '\t' + str(np.sum(this_acc > baseline)) + '/' +
        str(np.sum(this_acc < baseline)))
    
print('MetaL-CATS-SVM\t' + str(np.sum(svc_acc > baseline)) + '/' +
        str(np.sum(svc_acc < baseline)))

In [None]:
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(df_confusion, title='Confusion matrix'):
    f = plt.figure(figsize=[10,10])
    plt.matshow(df_confusion, cmap="Blues") # imshow
    #plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(df_confusion.columns))
    plt.xticks(tick_marks, df_confusion.columns, rotation=90)
    plt.yticks(tick_marks, df_confusion.columns)
    #plt.tight_layout()
    plt.ylabel(df_confusion.index.name)
    plt.xlabel(df_confusion.columns.name)
    plt.savefig("confusion.pdf")
    
df_confusion = pd.DataFrame(confusion_matrix(y, svc), columns=list(test_acc.keys())[1:-1])
plot_confusion_matrix(df_confusion)

## Hypothesis test

In [None]:
# !pip install Orange3
import Orange 

In [None]:
dicio = {"MetaL-CATS-1NN" : get_accs(nn), 
         "MetaL-CATS-5NN" : get_accs(knn),
         "MetaL-CATS-NB" : get_accs(nb),
         "MetaL-CATS-RF" : rf_avg,
         "MetaL-CATS-SVM" : get_accs(svc), 
         "TS-CHIEF" : ts_chief}

dicio = pd.DataFrame(dicio)

avranks = dicio.rank(axis=1,ascending=False).mean().values
cd = Orange.evaluation.compute_CD(avranks, dicio.shape[0], 
                                  alpha="0.05", test="nemenyi")

Orange.evaluation.graph_ranks(avranks, dicio.keys(), 
                              cd=cd, width=7, textspace=1.5)
plt.savefig('nemenyi_metalcats.pdf', bbox_inches='tight')
plt.show();

In [None]:
dicio = {"ROCKET" : test_acc['ROCKET'],
         "HIVE-COTE v1.0": test_acc['HIVE-COTE v1.0'],
         "Catch22" : test_acc['Catch22'],
         "MetaL-RF" : rf_avg,
         "MetaL-SVM" : get_accs(svc), 
         "TS-CHIEF" : ts_chief}

dicio = pd.DataFrame(dicio)

dicio = test_acc.drop('TESTACC',axis=1)
dicio["MetaL-CATS-SVM"] = get_accs(svc)
# dicio["Baseline"] = baseline
# dicio["Topline"] = topline

avranks = dicio.rank(axis=1,ascending=False).mean().values
cd = Orange.evaluation.compute_CD(avranks, dicio.shape[0]-1, 
                                  alpha="0.05", test="nemenyi")

Orange.evaluation.graph_ranks(avranks, dicio.keys(), 
                              cd=cd, width=8, textspace=1.5)
plt.savefig('nemenyi.pdf', bbox_inches='tight')
plt.show();