In [1]:
# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
import os
# 忽略所有warning
import warnings
warnings.filterwarnings('ignore')

from sklearn.base import clone

from convst.utils.dataset_utils import load_UCR_UEA_dataset_split
from convst.classifiers import R_DST_Ridge, R_DST_Ensemble

from sktime.classification.hybrid import HIVECOTEV2
from sktime.classification.dictionary_based import TemporalDictionaryEnsemble
from sktime.classification.interval_based import DrCIF
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sktime.classification.kernel_based import RocketClassifier

from timeit import default_timer as timer

In [2]:
# Define timining function
def time_pipe(pipeline, X_train, y_train):
    t0 = timer()
    pipeline.fit(X_train, y_train)
    pipeline.predict(X_train)
    t1 = timer()
    return t1-t0


In [3]:
# Number of validation step
n_cv = 10
# Number of parallel threads for each method
n_jobs=90

In [4]:
models = {'RDST Prime':R_DST_Ridge(n_jobs=n_jobs, prime_dilations=True),
          'RDST Ensemble Prime':R_DST_Ensemble(n_jobs=n_jobs, prime_dilations=True),
          'RDST':R_DST_Ridge(n_jobs=n_jobs, prime_dilations=False),
          'RDST Ensemble':R_DST_Ensemble(n_jobs=n_jobs, prime_dilations=False)}

In [5]:
# Execute all model once for possible numba compilations
X_train, _, y_train, _, _ = load_UCR_UEA_dataset_split("SmoothSubspace")
for name in models:
    time_pipe(clone(models[name]), X_train, y_train)

In [6]:
# In[Samples benchmarks]:
csv_name = 'n_samples_benchmarks.csv'    

X_train, _, y_train, _, _ = load_UCR_UEA_dataset_split("Crop")

#Had to cut number of samples to get results on our cluster.
n_samples = X_train.shape[0]//3

stp = n_samples//6
lengths = np.arange(stp,(n_samples)+stp,stp)
df = pd.DataFrame(index=lengths)
for name in models.keys():
    df[name] = pd.Series(0, index=df.index)


In [7]:
from sklearn.utils import resample

for l in lengths:
    x1 = resample(X_train, replace=False, n_samples=l, stratify=y_train, random_state=0)
    y1 = resample(y_train, replace=False, n_samples=l, stratify=y_train, random_state=0)
    print(x1.shape)
    for name in models:
        timing = []
        for i_cv in range(n_cv):
            print("{}/{}/n_cv:{}".format(name, l, i_cv))
            mod = clone(models[name])
            timing.append(time_pipe(mod, x1, y1))
        df.loc[l, name] = np.mean(timing)
        df.loc[l, name+'_std'] = np.std(timing)
        df.to_csv(csv_name)

(400, 1, 46)
RDST Prime/400/n_cv:0
RDST Prime/400/n_cv:1
RDST Prime/400/n_cv:2
RDST Prime/400/n_cv:3
RDST Prime/400/n_cv:4
RDST Prime/400/n_cv:5
RDST Prime/400/n_cv:6
RDST Prime/400/n_cv:7
RDST Prime/400/n_cv:8
RDST Prime/400/n_cv:9
RDST Ensemble Prime/400/n_cv:0
RDST Ensemble Prime/400/n_cv:1
RDST Ensemble Prime/400/n_cv:2
RDST Ensemble Prime/400/n_cv:3
RDST Ensemble Prime/400/n_cv:4
RDST Ensemble Prime/400/n_cv:5
RDST Ensemble Prime/400/n_cv:6
RDST Ensemble Prime/400/n_cv:7
RDST Ensemble Prime/400/n_cv:8
RDST Ensemble Prime/400/n_cv:9
RDST/400/n_cv:0
RDST/400/n_cv:1
RDST/400/n_cv:2
RDST/400/n_cv:3
RDST/400/n_cv:4
RDST/400/n_cv:5
RDST/400/n_cv:6
RDST/400/n_cv:7
RDST/400/n_cv:8
RDST/400/n_cv:9
RDST Ensemble/400/n_cv:0
RDST Ensemble/400/n_cv:1
RDST Ensemble/400/n_cv:2
RDST Ensemble/400/n_cv:3
RDST Ensemble/400/n_cv:4
RDST Ensemble/400/n_cv:5
RDST Ensemble/400/n_cv:6
RDST Ensemble/400/n_cv:7
RDST Ensemble/400/n_cv:8
RDST Ensemble/400/n_cv:9
(800, 1, 46)
RDST Prime/800/n_cv:0
RDST Prime/8

In [6]:
csv_name = 'benchmark.csv'    

X_train, _, y_train, _, _ = load_UCR_UEA_dataset_split("Rock")
#Had to cut number of samples to get results on our cluster.
n_timestamps = X_train.shape[2]


In [7]:
stp = n_timestamps//6
lengths = np.arange(stp,n_timestamps+stp,stp)
df = pd.DataFrame(index=lengths)
for name in models.keys():
    df[name] = pd.Series(0, index=df.index)

In [8]:
for l in lengths:
    x1 = X_train[:,:,:l]
    print(x1.shape)
    for name in models:
        timing = []
        for i_cv in range(n_cv):
            print("{}/{}/n_cv:{}".format(name, l, i_cv))
            mod = clone(models[name])
            timing.append(time_pipe(mod, x1, y_train))
        df.loc[l, name] = np.mean(timing)
        df.loc[l, name+'_std'] = np.std(timing)
        print("Saving file to:", os.getcwd())
        df.to_csv(csv_name)
        print("File saved:", csv_name)

(20, 1, 474)
RDST Prime/474/n_cv:0
RDST Prime/474/n_cv:1
RDST Prime/474/n_cv:2
RDST Prime/474/n_cv:3
RDST Prime/474/n_cv:4
RDST Prime/474/n_cv:5
RDST Prime/474/n_cv:6
RDST Prime/474/n_cv:7
RDST Prime/474/n_cv:8
RDST Prime/474/n_cv:9
Saving file to: /home/hawk/workspace/convst/PaperScripts
File saved: benchmark.csv
RDST Ensemble Prime/474/n_cv:0
RDST Ensemble Prime/474/n_cv:1
RDST Ensemble Prime/474/n_cv:2
RDST Ensemble Prime/474/n_cv:3
RDST Ensemble Prime/474/n_cv:4
RDST Ensemble Prime/474/n_cv:5
RDST Ensemble Prime/474/n_cv:6
RDST Ensemble Prime/474/n_cv:7
RDST Ensemble Prime/474/n_cv:8
RDST Ensemble Prime/474/n_cv:9
Saving file to: /home/hawk/workspace/convst/PaperScripts
File saved: benchmark.csv
RDST/474/n_cv:0
RDST/474/n_cv:1
RDST/474/n_cv:2
RDST/474/n_cv:3
RDST/474/n_cv:4
RDST/474/n_cv:5
RDST/474/n_cv:6
RDST/474/n_cv:7
RDST/474/n_cv:8
RDST/474/n_cv:9
Saving file to: /home/hawk/workspace/convst/PaperScripts
File saved: benchmark.csv
RDST Ensemble/474/n_cv:0
RDST Ensemble/474/n_cv: