In [1]:
import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
import pickle

import pandas as pd
from sktime.transformations.panel.rocket import Rocket, MiniRocketMultivariate
from glob import glob
from noise import decompress_pickle, compressed_pickle
import time
INPUT_DATA_PATH = '../input-data/'
MODEL_PATH = './models/'

In [2]:
def open_folds(cycle, train_test, X_y, v_i):
    """
    Parameters:
        cycle      : which cycle, ex.: 'cycle_1' (1, 2, 4, 8, 16, 32)
        train_test : if it is the train ot test set, ex: 'train' (train, test)
        X_y        : if it is the X or y set, ex.: 'X' (X, y)
        v_i        : if it is a voltage or current signal, ex.: 'i' (v, i)
    Return:
        list : each fold is in a position.
    """
    paths = list(map(lambda x: x.split('.pbz2')[0], glob(INPUT_DATA_PATH + 
                                                         f'folds/{v_i}/{cycle}/{X_y}_{train_test}_fold_[0-9]*.pbz2')))
    data_list = []
    for path in paths:
        folder_pos = int(path.split('/')[-1].split('_')[-1]) - 1
        fold = decompress_pickle(path)
        data_list.insert(folder_pos, fold)
    return data_list


def transform_data(X, num_kernels=10000):
    rocket = Rocket(num_kernels=num_kernels, n_jobs=-1)
    rocket.fit(X)
    X_transform = rocket.transform(X)
    return X_transform, rocket


def train_model(data_name, model_name, model, parameters, X_data_train, y_data_train, X_data_test,
                y_data_test):
    scores = []
    count = -1
    for X_train, y_train, X_test, y_test in zip(X_data_train, y_data_train,
                                                X_data_test, y_data_test):
        clf = model(**parameters)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        scores.append(score)
        if len(scores) == 1:
            s = pickle.dump(clf, open(model_path + f'clf_model_{data_name}.pkl', 'wb'))
        else:
            if score > scores[count]:
                s = pickle.dump(clf, open(model_path + f'clf_model_{data_name}.pkl', 'wb'))
        count += 1
    #final_scores.append(sum(scores)/len(scores))
    final_scores = np.array(scores)
    print(f'Acurácia em cada fold:\n {np.round(final_scores * 100, decimals=2)}')
    print(f'\nMédia da acurácia: {np.mean(final_scores) * 100:.2f}%')
    print(f'Desvio padrão da acurácia: {np.std(final_scores) * 100:.2f}%)')

In [3]:
train_X_cycle_1 = open_folds('cycle_1', 'train', 'X', 'i')
train_y_cycle_1 = open_folds('cycle_1', 'train', 'y', 'i')
test_X_cycle_1 = open_folds('cycle_1', 'test', 'X', 'i')
test_y_cycle_1 = open_folds('cycle_1', 'test', 'y', 'i')
val_X_cycle_1 = decompress_pickle(INPUT_DATA_PATH + 'folds/i/cycle_1/X_test')
val_y_cycle_1 = decompress_pickle(INPUT_DATA_PATH + 'folds/i/cycle_1/y_test')

In [4]:
train_X_cycle_1[1].sample(5)

Unnamed: 0,A,B,C,Z
878,0 0.183277 1 0.179079 2 0.17420...,0 0.064567 1 0.070880 2 0.07583...,0 -0.247976 1 -0.249467 2 -0.25140...,0 -7.361398e-09 1 4.957016e-09 2 ...
279,0 -0.138425 1 -0.141367 2 -0.14445...,0 0.185232 1 0.183684 2 0.18219...,0 -0.046927 1 -0.042106 2 -0.03757...,0 -0.000519 1 -0.000698 2 0.00047...
553,0 -0.157581 1 -0.158188 2 -0.15826...,0 0.090170 1 0.086713 2 0.08365...,0 0.067552 1 0.070677 2 0.07447...,0 -0.000042 1 0.000016 2 -0.00006...
832,0 -0.346019 1 -0.346542 2 -0.34805...,0 0.487940 1 0.470473 2 0.45347...,0 -0.142555 1 -0.124596 2 -0.10626...,0 0.000015 1 0.000050 2 0.00005...
888,0 0.164698 1 0.161594 2 0.15879...,0 0.015596 1 0.022110 2 0.02602...,0 -0.181084 1 -0.182660 2 -0.18649...,0 4.946188e-09 1 2.372673e-09 2 ...


In [5]:
scores = []
count = -1
num_kernels = 10000
s = time.time()
for X_train, y_train, X_test, y_test in zip(train_X_cycle_1, train_y_cycle_1,
                                            test_X_cycle_1, test_y_cycle_1):
    rocket = Rocket(num_kernels=num_kernels)
    rocket.fit(X_train)
    X_train_transform = rocket.transform(X_train)
    X_test_transform = rocket.transform(X_test)

    clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    clf.fit(X_train_transform, y_train)

    score = clf.score(X_test_transform, y_test)
    scores.append(score)
    if len(scores) == 1:
        pickle.dump(clf, open(MODEL_PATH + 'rocket_model_cycle_1', 'wb'))
        # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    else:
        if score > scores[count]:
            pickle.dump(clf, open(MODEL_PATH + 'rocket_model_cycle_1', 'wb'))
            # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    count += 1
e = time.time()
final_scores = np.array(scores)
print(f'Acurácia em cada fold:\n {np.round(final_scores * 100, decimals=2)}')
print(f'\nMédia da acurácia: {np.mean(final_scores) * 100:.2f}%')
print(f'Desvio padrão da acurácia: {np.std(final_scores) * 100:.2f}%)')
print(f'Tempo necessário para treinamento {e-s} segundos')

Acurácia em cada fold:
 [100.    12.   100.   100.    21.33  21.33  18.67  16.    17.33  98.67]

Média da acurácia: 50.53%
Desvio padrão da acurácia: 40.20%)
Tempo necessário para treinamento 468.3053648471832 segundos


In [6]:
scores = []
count = -1
num_kernels = 5000
s = time.time()
for X_train, y_train, X_test, y_test in zip(train_X_cycle_1, train_y_cycle_1,
                                            test_X_cycle_1, test_y_cycle_1):
    rocket = Rocket(num_kernels=num_kernels)
    rocket.fit(X_train)
    X_train_transform = rocket.transform(X_train)
    X_test_transform = rocket.transform(X_test)

    clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    clf.fit(X_train_transform, y_train)

    score = clf.score(X_test_transform, y_test)
    scores.append(score)
    if len(scores) == 1:
        pickle.dump(clf, open(MODEL_PATH + 'rocket_model_less_kernels_cycle_1', 'wb'))
        # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    else:
        if score > scores[count]:
            pickle.dump(clf, open(MODEL_PATH + 'rocket_model_less_kernels_cycle_1', 'wb'))
            # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    count += 1
e = time.time()
final_scores = np.array(scores)
print(f'Acurácia em cada fold:\n {np.round(final_scores * 100, decimals=2)}')
print(f'\nMédia da acurácia: {np.mean(final_scores) * 100:.2f}%')
print(f'Desvio padrão da acurácia: {np.std(final_scores) * 100:.2f}%)')
print(f'Tempo necessário para treinamento {e-s} segundos')

Acurácia em cada fold:
 [100.    12.   100.   100.    21.33  22.67  18.67  16.    17.33  90.67]

Média da acurácia: 49.87%
Desvio padrão da acurácia: 39.21%)
Tempo necessário para treinamento 214.22762966156006 segundos


In [9]:
scores = []
count = -1
num_features = 2000
s = time.time()
for X_train, y_train, X_test, y_test in zip(train_X_cycle_1, train_y_cycle_1,
                                            test_X_cycle_1, test_y_cycle_1):
    rocket = MiniRocketMultivariate(num_features=num_features, max_dilations_per_kernel=64)
    rocket.fit(X_train)
    X_train_transform = rocket.transform(X_train)
    X_test_transform = rocket.transform(X_test)

    clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    clf.fit(X_train_transform, y_train)

    score = clf.score(X_test_transform, y_test)
    scores.append(score)
    if len(scores) == 1:
        pickle.dump(clf, open(MODEL_PATH + 'rocket_model_less_kernels_cycle_1', 'wb'))
        # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    else:
        if score > scores[count]:
            pickle.dump(clf, open(MODEL_PATH + 'rocket_model_less_kernels_cycle_1', 'wb'))
            # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    count += 1
e = time.time()
final_scores = np.array(scores)
print(f'Acurácia em cada fold:\n {np.round(final_scores * 100, decimals=2)}')
print(f'\nMédia da acurácia: {np.mean(final_scores) * 100:.2f}%')
print(f'Desvio padrão da acurácia: {np.std(final_scores) * 100:.2f}%)')
print(f'Tempo necessário para treinamento {e-s} segundos')

Acurácia em cada fold:
 [100.    12.   100.   100.    21.33  21.33  17.33  16.    17.33 100.  ]

Média da acurácia: 50.53%
Desvio padrão da acurácia: 40.47%)
Tempo necessário para treinamento 23.90195345878601 segundos


In [10]:
scores = []
count = -1
num_features = 5000
s = time.time()
for X_train, y_train, X_test, y_test in zip(train_X_cycle_1, train_y_cycle_1,
                                            test_X_cycle_1, test_y_cycle_1):
    rocket = MiniRocketMultivariate(num_features=num_features, max_dilations_per_kernel=64)
    rocket.fit(X_train)
    X_train_transform = rocket.transform(X_train)
    X_test_transform = rocket.transform(X_test)

    clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
    clf.fit(X_train_transform, y_train)

    score = clf.score(X_test_transform, y_test)
    scores.append(score)
    if len(scores) == 1:
        pickle.dump(clf, open(MODEL_PATH + 'rocket_model_less_kernels_cycle_1', 'wb'))
        # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    else:
        if score > scores[count]:
            pickle.dump(clf, open(MODEL_PATH + 'rocket_model_less_kernels_cycle_1', 'wb'))
            # compressed_pickle(MODEL_PATH + 'rocket_model_cycle_1', clf)
    count += 1
e = time.time()
final_scores = np.array(scores)
print(f'Acurácia em cada fold:\n {np.round(final_scores * 100, decimals=2)}')
print(f'\nMédia da acurácia: {np.mean(final_scores) * 100:.2f}%')
print(f'Desvio padrão da acurácia: {np.std(final_scores) * 100:.2f}%)')
print(f'Tempo necessário para treinamento {e-s} segundos')

Acurácia em cada fold:
 [100.    12.   100.   100.    21.33  21.33  20.    16.    17.33 100.  ]

Média da acurácia: 50.80%
Desvio padrão da acurácia: 40.25%)
Tempo necessário para treinamento 49.76265001296997 segundos


In [55]:
X_train = train_X_cycle_1[0]
y_train = train_y_cycle_1[0]
X_test = test_X_cycle_1[0]
y_test = test_y_cycle_1[0]

In [56]:
X_test

Unnamed: 0,A,B,C,Z
202,0 -0.138458 1 -0.138814 2 -0.13715...,0 0.043562 1 0.040369 2 0.03636...,0 0.095490 1 0.098043 2 0.10046...,0 -0.000013 1 0.000074 2 0.00028...
141,0 -0.184703 1 -0.184598 2 -0.18524...,0 0.110263 1 0.105860 2 0.10209...,0 0.074936 1 0.079141 2 0.08331...,0 0.000152 1 -0.000134 2 -0.00133...
755,0 0.027851 1 0.025150 2 0.02250...,0 0.078763 1 0.080624 2 0.08241...,0 -0.106236 1 -0.105946 2 -0.10451...,0 -1.441403e-09 1 6.294212e-09 2 ...
801,0 -0.173841 1 -0.173105 2 -0.17254...,0 0.268997 1 0.256967 2 0.24510...,0 0.113716 1 0.117051 2 0.12025...,0 0.208661 1 0.200896 2 0.19299...
433,0 -0.058709 1 -0.051728 2 -0.04588...,0 -0.184728 1 -0.189122 2 -0.19314...,0 0.242317 1 0.240868 2 0.23892...,0 -0.000544 1 0.000378 2 -0.00009...
...,...,...,...,...
539,0 0.045237 1 0.035806 2 0.02514...,0 0.341439 1 0.347612 2 0.35271...,0 -0.386196 1 -0.381741 2 -0.37767...,0 0.000359 1 0.000379 2 0.00005...
562,0 -0.179650 1 -0.186785 2 -0.19425...,0 0.341001 1 0.340607 2 0.34055...,0 -0.161014 1 -0.153341 2 -0.14657...,0 -0.000038 1 0.000670 2 0.00031...
712,0 -0.135308 1 -0.133917 2 -0.13299...,0 0.023621 1 0.020090 2 0.01665...,0 0.112190 1 0.113692 2 0.11618...,0 1.047495e-08 1 1.020776e-08 2 ...
611,0 0.028178 1 0.025055 2 0.02151...,0 0.104140 1 0.106141 2 0.10909...,0 -0.132071 1 -0.130757 2 -0.12928...,0 0.000061 1 -0.000115 2 -0.00005...


In [57]:
y_test

array(['ABT', 'BT', 'CA', 'BT', 'AT', 'BC', 'CA', 'ABT', 'ABC', 'BC',
       'CAT', 'BT', 'CT', 'AB', 'BC', 'CA', 'CA', 'AT', 'CAT', 'AT',
       'ABC', 'BC', 'BC', 'BC', 'BT', 'CAT', 'ABC', 'ABT', 'CAT', 'CAT',
       'CT', 'BT', 'CAT', 'ABT', 'AT', 'CT', 'CT', 'BT', 'AB', 'ABC',
       'ABT', 'BCT', 'CT', 'CA', 'CAT', 'BC', 'ABC', 'ABC', 'BCT', 'BCT',
       'AB', 'BCT', 'CT', 'ABT', 'AT', 'CT', 'ABC', 'AT', 'CA', 'BT',
       'AB', 'CT', 'ABC', 'AB', 'AB', 'CA', 'BCT', 'ABT', 'BCT', 'AB',
       'ABT', 'AT', 'AT', 'CA', 'BCT', 'BCT'], dtype='<U3')

In [58]:
from sklearn.linear_model import RidgeClassifier
rocket = MiniRocketMultivariate(num_features=20000, max_dilations_per_kernel=32)
rocket.fit(X_train)
X_train_transform = rocket.transform(X_train)
X_test_transform = rocket.transform(X_test)

clf = RidgeClassifier()
clf.fit(X_train_transform, y_train)

score = clf.score(X_test_transform, y_test)

In [59]:
score

1.0

In [48]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.classification.dictionary_based import BOSSEnsemble
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.classification.shapelet_based import MrSEQLClassifier
from sktime.datasets import load_basic_motions
from sktime.transformations.panel.compose import ColumnConcatenator

In [49]:
steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=100)),
]

In [50]:
clf = Pipeline(steps)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.13333333333333333

In [53]:
clf.predict(X_test)

array(['CAT', 'BC', 'AT', 'BT', 'ABC', 'BT', 'AB', 'AT', 'ABC', 'BT',
       'AT', 'ABC', 'CA', 'BT', 'BT', 'AB', 'CAT', 'AB', 'BT', 'CA', 'CA',
       'AB', 'AT', 'ABC', 'BT', 'AB', 'CA', 'CAT', 'CT', 'CT', 'CT',
       'ABC', 'BCT', 'CT', 'AB', 'AB', 'CT', 'BCT', 'ABC', 'CA', 'ABC',
       'CAT', 'BC', 'ABT', 'AB', 'BC', 'CAT', 'CA', 'ABC', 'CA', 'AT',
       'ABT', 'AB', 'BC', 'CAT', 'ABT', 'ABT', 'BCT', 'CA', 'BCT', 'CA',
       'CA', 'BCT', 'BCT', 'BCT', 'CAT', 'CAT', 'ABT', 'BC', 'ABT', 'BCT',
       'BC', 'BC', 'BC', 'BC'], dtype='<U3')

In [68]:
X_train = train_X_cycle_1[0]
y_train = train_y_cycle_1[0]
X_test = test_X_cycle_1[0]
y_test = test_y_cycle_1[0]

In [69]:
pd.Series(y_test).value_counts()

BCT    8
CA     8
ABT    8
ABC    8
CT     8
AT     8
CAT    7
BT     7
BC     7
AB     7
dtype: int64

In [70]:
pd.Series(y_train).value_counts()

AB     69
CAT    68
BT     68
CA     68
BC     68
BCT    67
AT     67
CT     67
ABT    67
ABC    67
dtype: int64

In [71]:
X_train = train_X_cycle_1[1]
y_train = train_y_cycle_1[1]
X_test = test_X_cycle_1[1]
y_test = test_y_cycle_1[1]

In [72]:
pd.Series(y_test).value_counts()

CT     8
ABC    8
BT     8
BCT    8
AT     8
CA     7
AB     7
BC     7
ABT    7
CAT    7
dtype: int64

In [73]:
pd.Series(y_train).value_counts()

CA     69
AB     68
CAT    68
BCT    68
ABT    68
BC     68
BT     67
AT     67
CT     67
ABC    67
dtype: int64