In [1]:
import numpy as np
from pyts.classification import SAXVSM
from pyts.datasets import fetch_ucr_dataset

# Ignore warnings raised by BagOfWords
import warnings
warnings.filterwarnings('ignore')

import pyts
print("pyts: {0}".format(pyts.__version__))

pyts: 0.11.0


In [2]:
dataset_params = {
    'Adiac': {'window_size': 64, 'word_size': 32},
    'ECG200': {'window_size': 30, 'word_size': 10},
    'GunPoint': {'window_size': 64, 'word_size': 12, 'n_bins': 5},
    'MiddlePhalanxTW': {'window_size': 60, 'word_size': 31, 'n_bins': 6,
                        'norm_std': False, 'use_idf': False, 'sublinear_tf': False},
    'Plane': {'window_size': 80, 'word_size': 20},
    'SyntheticControl': {'window_size': 20, 'word_size': 5},
}

In [3]:
for dataset, params in dataset_params.items():
    print(dataset)
    print('-' * len(dataset))
    
    X_train, X_test, y_train, y_test = fetch_ucr_dataset(dataset, return_X_y=True)

    clf = SAXVSM(**params, strategy='normal')
    accuracy = clf.fit(X_train, y_train).score(X_test, y_test)

    print('Accuracy on the test set: {0:.3f}'.format(accuracy))
    print()

Adiac
-----
Accuracy on the test set: 0.458

ECG200
------
Accuracy on the test set: 0.840

GunPoint
--------
Accuracy on the test set: 0.993

MiddlePhalanxTW
---------------
Accuracy on the test set: 0.545

Plane
-----
Accuracy on the test set: 0.981

SyntheticControl
----------------
Accuracy on the test set: 0.933

