In [1]:
import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

import pandas as pd
from sktime.datasets import load_arrow_head  # univariate dataset
from sktime.datasets.base import load_japanese_vowels  # multivariate dataset
from sktime.transformations.panel.rocket import Rocket, MiniRocketMultivariate

from noise import decompress_pickle
INPUT_DATA_PATH = '../input-data/'
data = decompress_pickle(INPUT_DATA_PATH + 'cycle_data')

In [2]:
def format_dataframe(data):
    cols = int(data.shape[0] / 4)
    shaped_data = data.reshape((4, cols)).T
    s1 = pd.Series(shaped_data[:, 0])
    s2 = pd.Series(shaped_data[:, 1])
    s3 = pd.Series(shaped_data[:, 2])
    s4 = pd.Series(shaped_data[:, 3])
    dicio = {'A': [], 'B': [], 'C': [], 'Z': []}
    dicio['A'].append(s1)
    dicio['B'].append(s2)
    dicio['C'].append(s3)
    dicio['Z'].append(s4)
    return pd.DataFrame(dicio)

In [16]:
data_list = []
target_list = []
for d in data:
    data_list.append(format_dataframe(d['i_cycle_1']))
    target_list.append(d['fault_type'])
X = pd.concat(data_list).reset_index(drop=True)
y = np.array(target_list)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
X_train.shape

(752, 4)

In [6]:
X_test.shape

(188, 4)

In [7]:
# https://github.com/alan-turing-institute/sktime/blob/main/examples/rocket.ipynb

rocket = Rocket()
rocket.fit(X_train)
X_train_transform = rocket.transform(X_train)

In [8]:
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
classifier.fit(X_train_transform, y_train)

RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [9]:
X_test_transform = rocket.transform(X_test)

In [10]:
classifier.score(X_test_transform, y_test)

0.9893617021276596

In [11]:
# https://github.com/alan-turing-institute/sktime/blob/main/examples/minirocket.ipynb

minirocket = MiniRocketMultivariate()   
minirocket.fit(X_train)
X_train_transform = rocket.transform(X_train)

In [12]:
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
classifier.fit(X_train_transform, y_train)

RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [13]:
classifier.score(X_test_transform, y_test)

0.9893617021276596

In [14]:
# https://github.com/alan-turing-institute/sktime/blob/main/examples/03_classification_multivariate.ipynb

from sklearn.pipeline import Pipeline

from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.classification.dictionary_based import BOSSEnsemble
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.classification.shapelet_based import MrSEQLClassifier
from sktime.datasets import load_basic_motions
from sktime.transformations.panel.compose import ColumnConcatenator

steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=100)),
]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.9893617021276596

In [28]:
from noise import compressed_pickle
from sklearn.model_selection import StratifiedKFold

In [31]:
def open_data(signal_type, cycle_name):
    data_list = []
    target_list = []
    for d in data:
        data_list.append(format_dataframe(d[f'{signal_type}_{cycle_name}']))
        target_list.append(d['fault_type'])
    X = pd.concat(data_list).reset_index(drop=True)
    y = np.array(target_list)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
    compressed_pickle(INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/' + 'X_test', X_test)
    compressed_pickle(INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/' + 'y_test', y_test)
    return X_train, y_train

In [46]:
def save_folds(signal_type, cycle_name):
    X_train, y_train = open_data(signal_type, cycle_name)
    data_folds_path = INPUT_DATA_PATH + f'folds/{signal_type}/{cycle_name}/'
    kf = StratifiedKFold(n_splits=10)
    fold = 0
    for tr, te in kf.split(X_train, y_train):
        fold += 1
        X_tr, X_te = X_train.iloc[tr, :], X_train.iloc[te, :]
        y_tr, y_te = y_train[tr], y_train[te]
        compressed_pickle(data_folds_path + f'train_fold_{fold}', X_tr)
        compressed_pickle(data_folds_path + f'test_fold_{fold}', X_te)
        compressed_pickle(data_folds_path + f'train_fold_{fold}', y_tr)
        compressed_pickle(data_folds_path + f'test_fold_{fold}', y_te)

In [47]:
cycle_list = ['cycle_1', 'cycle_2', 'cycle_4', 'cycle_8', 'cycle_16', 'cycle_32']

for cycle_name in cycle_list:
    save_folds('v', cycle_name)
    save_folds('i', cycle_name)