# Learning Shapelet

* Paper: https://www.ismll.uni-hildesheim.de/pub/pdfs/grabocka2014e-kdd.pdf

**Note: an Internet connection is required to download the datasets used in this benchmark.**

In [1]:
import numpy as np
from pyts.classification import LearningShapelets
from pyts.datasets import fetch_ucr_dataset


import pyts
print("pyts: {0}".format(pyts.__version__))

pyts: 0.11.0


In [2]:
dataset_params = {
    'Adiac': {'n_shapelets_per_size': 0.3, 'min_shapelet_length': 0.1,
              'learning_rate': 100.0, 'C': 1e7, 'tol': 1e-8},
    'ECG200': {'n_shapelets_per_size': 0.3, 'min_shapelet_length': 0.15,
               'learning_rate': 1.0, 'C': 10000},
    'GunPoint': {'n_shapelets_per_size': 0.4, 'min_shapelet_length': 0.1,
                 'learning_rate': 1.0, 'C': 100000},
    'MiddlePhalanxTW': {'n_shapelets_per_size': 0.3, 'min_shapelet_length': 0.1,
                        'learning_rate': 10.0, 'C': 10, 'multi_class': 'ovr'},
    'Plane': {'n_shapelets_per_size': 0.3, 'min_shapelet_length': 0.05,
              'learning_rate': 1.0, 'C': 1000},
    'SyntheticControl': {'n_shapelets_per_size': 0.3, 'min_shapelet_length': 0.1,
                         'learning_rate': 1.0, 'C': 1000},
}

In [3]:
for dataset, params in dataset_params.items():
    print(dataset)
    print('-' * len(dataset))
    
    X_train, X_test, y_train, y_test = fetch_ucr_dataset(dataset, return_X_y=True)

    clf = LearningShapelets(random_state=42, class_weight='balanced', **params)
    accuracy = clf.fit(X_train, y_train).score(X_test, y_test)

    print('Accuracy on the test set: {0:.3f}'.format(accuracy))
    print()

Adiac
-----
Accuracy on the test set: 0.527

ECG200
------
Accuracy on the test set: 0.860

GunPoint
--------
Accuracy on the test set: 0.987

MiddlePhalanxTW
---------------
Accuracy on the test set: 0.552

Plane
-----
Accuracy on the test set: 0.990

SyntheticControl
----------------
Accuracy on the test set: 0.990

