In [1]:
import numpy as np
import logging
import math
import pandas as pd
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
import os

In [3]:
from sktime.transformations.panel.rocket import Rocket, MultiRocketMultivariate
from sklearn.linear_model import RidgeClassifierCV
from sktime.datasets import load_from_tsfile_to_dataframe

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

from sktime.classification.dictionary_based import ContractableBOSS
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.classification.interval_based import CanonicalIntervalForest, DrCIF

In [5]:
FILE_NAME_X = '{}_{}_X'
FILE_NAME_Y = '{}_{}_Y'
FILE_NAME_PID = '{}_{}_pid'

In [6]:
def read_dataset(path, data_type):
    x_train, y_train = load_from_tsfile_to_dataframe(os.path.join(path,
                                                                  FILE_NAME_X.format("TRAIN", data_type) + ".ts"))

    logger.info("Training data shape {} {} {}".format(x_train.shape, len(x_train.iloc[0, 0]), y_train.shape))
    x_test, y_test = load_from_tsfile_to_dataframe(os.path.join(path,
                                                                FILE_NAME_X.format("TEST", data_type) + ".ts"))
    logger.info("Testing data shape: {} {}".format(x_test.shape, y_test.shape))

    logger.info("Testing data shape: {} {}".format(x_test.shape, y_test.shape))
    test_pid = np.load(os.path.join(path, FILE_NAME_PID.format("TEST", data_type) + ".npy"), allow_pickle=True)
    train_pid = np.load(os.path.join(path, FILE_NAME_PID.format("TRAIN", data_type) + ".npy"), allow_pickle=True)

    try:
        x_val, y_val = load_from_tsfile_to_dataframe(os.path.join(path,
                                                                  FILE_NAME_X.format("VAL", data_type) + ".ts"))
        logger.info("Validation data shape: {} {}".format(x_val.shape, y_val.shape))
    except FileNotFoundError:
        logger.info("Validation data is empty:")
        x_val, y_val = None, None

    return x_train, y_train, x_test, y_test, x_val, y_val, train_pid, test_pid

In [7]:
data_type = "default"
input_path_combined =  "/home/ashish/Results/Datasets/Shimmer/MP/TrainTestDataSktime/103007/MulticlassSplit/"
x_train, y_train, x_test, y_test, x_val, y_val, train_pid, test_pid = read_dataset(input_path_combined, data_type)

INFO:__main__:Training data shape (1426, 45) 161 (1426,)
INFO:__main__:Testing data shape: (595, 45) (595,)
INFO:__main__:Testing data shape: (595, 45) (595,)
INFO:__main__:Validation data is empty:


In [12]:
multi_rocket = MultiRocketMultivariate(num_kernels=10000, normalise=False)  # random_state=100343
multi_rocket.fit(x_train)
x_training_transform = multi_rocket.transform(x_train)
x_test_transform = multi_rocket.transform(x_test)
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
classifier.fit(x_training_transform, y_train)
predictions = classifier.predict(x_test_transform)

classification_report = metrics.classification_report(y_test, predictions)
logger.info("-----------------------------------------------")
logger.info("Metrics on testing data")
logger.info("Accuracy {}".format(metrics.accuracy_score(y_test, predictions)))
logger.info("\n Classification report: \n{}".format(classification_report))

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




NameError: name 'metrics' is not defined

In [16]:
clf = ContractableBOSS(n_parameter_samples=10, max_ensemble_size=3)
clf.fit(x_train, y_train)

ValueError: Data seen by ContractableBOSS instance has multivariate series, but this ContractableBOSS instance cannot handle multivariate series. Calls with multivariate series may result in error or unreliable results.

In [None]:
y_pred = clf.predict(x_test)

In [19]:
clf = TimeSeriesForestClassifier(n_estimators=5)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

ValueError: Data seen by TimeSeriesForestClassifier instance has multivariate series, but this TimeSeriesForestClassifier instance cannot handle multivariate series. Calls with multivariate series may result in error or unreliable results.

In [8]:
%%timeit
clf = CanonicalIntervalForest(100)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

KeyboardInterrupt: 

In [None]:
classification_report = metrics.classification_report(y_test, y_pred)
logger.info("-----------------------------------------------")
logger.info("Metrics on testing data")
logger.info("Accuracy {}".format(metrics.accuracy_score(y_test, y_pred)))
logger.info("\n Classification report: \n{}".format(classification_report))

In [None]:
%%timeit
clf = DrCIF()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

In [None]:
classification_report = metrics.classification_report(y_test, y_pred)
logger.info("-----------------------------------------------")
logger.info("Metrics on testing data")
logger.info("Accuracy {}".format(metrics.accuracy_score(y_test, y_pred)))
logger.info("\n Classification report: \n{}".format(classification_report))