In [1]:
import sys
import numpy as np
import pandas as pd
import pickle
import json

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.model_selection import train_test_split

from sktime.classification.dictionary_based import IndividualBOSS, ContractableBOSS, BOSSEnsemble
from sktime.classification.hybrid import HIVECOTEV1
from sktime.classification.kernel_based import ROCKETClassifier
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.contrib.vector_classifiers._rotation_forest import RotationForest
from sktime.classification.interval_based import RandomIntervalSpectralForest, TimeSeriesForestClassifier
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

In [4]:
json_data = []
SERIES_LEN =300
TRAIN_TEST_RATIO=0.9
with open(f'{SERIES_LEN}_data.json') as f:
    data = json.load(f)

In [5]:
from copy import deepcopy

In [6]:
data.keys()

dict_keys(['MW', 'TV', 'HD', 'WM', 'IR', 'FR', 'MG', 'EK', 'GY'])

In [7]:
X=[]
y=[]
for curr_key,curr_val in data.items():
    print(curr_key)
    for curr_arr in curr_val:
        y.append(curr_key)
        X.append(deepcopy(curr_arr))

MW
TV
HD
WM
IR
FR
MG
EK
GY


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y,shuffle=True,random_state=2121219,stratify=y, train_size=TRAIN_TEST_RATIO)

In [9]:
def get_correct_format(arr):
    ret_arr=[]
    for curr_arr in arr:
        ret_arr.append([pd.Series(curr_arr)])
    return pd.DataFrame(ret_arr)

In [10]:
X_train=get_correct_format(X_train)

In [11]:
X_test=get_correct_format(X_test)

In [12]:
len(X_train)

1638

In [13]:
y_train=np.array(y_train)
y_test=np.array(y_test)

In [14]:
def get_freq(arr):
    curr_dict={}
    for x in arr:
        try:
            curr_dict[x]+=1
        except:
            curr_dict[x]=1
    return curr_dict

In [15]:
models=[]

# #####################################

In [16]:
models = []
models.append(('Rocket', ROCKETClassifier(num_kernels=1000)))
models.append(('IndividualBOSS', IndividualBOSS()))
models.append(('RISE', RandomIntervalSpectralForest(n_estimators=10)))
models.append(('TSForest', TimeSeriesForestClassifier()))
models.append(('Shapelet', ShapeletTransformClassifier(estimator=RotationForest(n_estimators=3), n_shapelet_samples=500, max_shapelets=20, batch_size=100)))

In [17]:
# models.append(('ContractableBOSS', ContractableBOSS(n_parameter_samples=25, max_ensemble_size=5)))

In [18]:
# computationally heavy classifiers

#models.append(('BOSSEnsemble', BOSSEnsemble(max_ensemble_size=5)))
#models.append(('KNTs', KNeighborsTimeSeriesClassifier()))

In [19]:
scores = []
names = []
models_dict={}
for name, model in models:
    model.fit(X_train, y_train)
    model_score = model.score(X_test, y_test)
    scores.append(model_score)
    names.append(name)
    models_dict[name]=deepcopy(model)
    print("{:s}: {:.3f}".format(name, model_score))

with open(str(SERIES_LEN) +"_"+str(TRAIN_TEST_RATIO)+ "_models.pckl", "wb") as f:
    pickle.dump(models_dict, f)

Rocket: 0.890
IndividualBOSS: 0.923
RISE: 0.973
TSForest: 0.951
Shapelet: 0.824


In [20]:
!ls

300_0.4_models.pckl	   analyze.ipynb	    dtw_try.ipynb
300_0.5_models.pckl	   backup.md		    explore.ipynb
300_0.67_models.pckl	   classifier.ipynb	    mod_data.json
300_0.8_models.pckl	   commands.md		    model_explore.ipynb
300_0.9_models.pckl	   construct_dataset.ipynb  predict_lib.py
300_boss_0.67_models.pckl  data.json		    rocket_tut.ipynb
300_data.json		   data_old.json	    test.py
