In [1]:
import numpy as np
from pyts.transformation import BagOfPatterns
from pyts.datasets import fetch_ucr_dataset
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline

# Ignore warnings raised by BagOfWords
import warnings
warnings.filterwarnings('ignore')

import pyts
print("pyts: {0}".format(pyts.__version__))

pyts: 0.11.0


In [2]:
dataset_params = {
    'Adiac': {'window_size': 32, 'word_size': 11, 'n_bins': 11},
    'ECG200': {'window_size': 32, 'word_size': 8, 'n_bins': 4},
    'GunPoint': {'window_size': 32, 'word_size': 4, 'n_bins': 4},
    'MiddlePhalanxTW': {'window_size': 32, 'word_size': 12, 'n_bins': 6},
    'Plane': {'window_size': 24, 'word_size': 4, 'n_bins': 4},
    'SyntheticControl': {'window_size': 24, 'word_size': 4, 'n_bins': 3}
}

In [3]:
for dataset, params in dataset_params.items():
    print(dataset)
    print('-' * len(dataset))

    X_train, X_test, y_train, y_test = fetch_ucr_dataset(dataset, return_X_y=True)
    
    # Truncate the input data containing padding values
    if dataset == 'MiddlePhalanxTW':
        X_train, X_test = X_train[:, :-29], X_test[:, :-29]

    pipeline = make_pipeline(
        BagOfPatterns(**params, strategy='normal', numerosity_reduction=False),
        KNeighborsClassifier(n_neighbors=1)
    )
    accuracy = pipeline.fit(X_train, y_train).score(X_test, y_test)

    print('Accuracy on the test set: {0:.3f}'.format(accuracy))
    print()

Adiac
-----
Accuracy on the test set: 0.614

ECG200
------
Accuracy on the test set: 0.840

GunPoint
--------
Accuracy on the test set: 0.980

MiddlePhalanxTW
---------------
Accuracy on the test set: 0.474

Plane
-----
Accuracy on the test set: 1.000

SyntheticControl
----------------
Accuracy on the test set: 0.953

