# WEASEL: Word Extraction for time SEries cLassification

* Website: https://www2.informatik.hu-berlin.de/~schaefpa/weasel/

* Paper: https://www2.informatik.hu-berlin.de/~schaefpa/weasel.pdf

**Note: an Internet connection is required to download the datasets used in this benchmark.**

In [1]:
import numpy as np
from pyts.transformation import WEASEL
from pyts.datasets import fetch_ucr_dataset
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression


import pyts
print("pyts: {0}".format(pyts.__version__))

pyts: 0.10.0


In [2]:
clf = LogisticRegression(penalty='l2', C=1, fit_intercept=True,
                         solver='liblinear', multi_class='ovr')

dataset_params = {
    'Adiac': {'word_size': 3, 'window_sizes': np.arange(4, 176)},
    'ECG200': {'word_size': 2, 'window_sizes': np.arange(3, 95)},
    'GunPoint': {'word_size': 4, 'window_sizes': np.arange(5, 149)},
    'MiddlePhalanxTW': {'word_size': 2, 'window_sizes': np.arange(3, 50)},
    'Plane': {'word_size': 6, 'window_sizes': np.arange(7, 140)},
    'SyntheticControl': {'word_size': 2, 'window_sizes': np.arange(10, 59)}
}

In [3]:
for dataset, params in dataset_params.items():
    print(dataset)
    print('-' * len(dataset))
    
    X_train, X_test, y_train, y_test = fetch_ucr_dataset(dataset, return_X_y=True)
    
    # Truncate the input data containing padding values
    if dataset == 'MiddlePhalanxTW':
        X_train, X_test = X_train[:, :-29], X_test[:, :-29]

    weasel = WEASEL(**params)
    pipeline = Pipeline([('weasel', weasel), ('clf', clf)])
    accuracy = pipeline.fit(X_train, y_train).score(X_test, y_test)

    print('Accuracy on the test set: {0:.3f}'.format(accuracy))
    print()

Adiac
-----
Accuracy on the test set: 0.788

ECG200
------
Accuracy on the test set: 0.850

GunPoint
--------
Accuracy on the test set: 0.960

MiddlePhalanxTW
---------------
Accuracy on the test set: 0.558

Plane
-----
Accuracy on the test set: 1.000

SyntheticControl
----------------
Accuracy on the test set: 0.973

