# Shapelet Transform

* Paper: https://archive.uea.ac.uk/~ajb/Papers/LinesKDD2012.pdf

**Note: an Internet connection is required to download the datasets used in this benchmark.**

In [1]:
import numpy as np
from pyts.transformation import ShapeletTransform
from pyts.datasets import fetch_ucr_dataset
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC


import pyts
print("pyts: {0}".format(pyts.__version__))

pyts: 0.10.0


In [2]:
clf = LinearSVC(max_iter=int(1e5))

dataset_params = {
    'Adiac': {'window_sizes': np.arange(10, 171, 20)}, # YES
    'ECG200': {'window_sizes': np.arange(20, 91, 5)}, # YES
    'GunPoint': {'window_sizes': np.arange(10, 130, 3)}, # OKAY
    'MiddlePhalanxTW': {'window_sizes': np.arange(3, 50, 20)}, # YES (need around 0.58)
    'Plane': {'window_sizes': np.arange(3, 144, 20)}, # YES (need perfect)
    'SyntheticControl': {'window_sizes': np.arange(3, 60, 10)} # YES(need 0.8733)
}

In [3]:
for dataset, params in dataset_params.items():
    print(dataset)
    print('-' * len(dataset))
    
    X_train, X_test, y_train, y_test = fetch_ucr_dataset(dataset, return_X_y=True)
    
    # Truncate the input data containing padding values
    if dataset == 'MiddlePhalanxTW':
        X_train, X_test = X_train[:, :-29], X_test[:, :-29]

    shapelet = ShapeletTransform(random_state=42, n_jobs=-1, **params)
    pipeline = Pipeline([('shapelet', shapelet), ('clf', clf)])
    accuracy = pipeline.fit(X_train, y_train).score(X_test, y_test)

    print('Accuracy on the test set: {0:.3f}'.format(accuracy))
    print()

Adiac
-----
Accuracy on the test set: 0.432

ECG200
------
Accuracy on the test set: 0.880

GunPoint
--------
Accuracy on the test set: 0.967

MiddlePhalanxTW
---------------
Accuracy on the test set: 0.584

Plane
-----
Accuracy on the test set: 1.000

SyntheticControl
----------------
Accuracy on the test set: 0.937

