# BOSSVS: Bag of SFA Symbols in Vector Space

* Website: https://www2.informatik.hu-berlin.de/~schaefpa/bossVS/

* Paper: https://www2.informatik.hu-berlin.de/~schaefpa/bossvs.pdf

**Note: an Internet connection is required to download the datasets used in this benchmark.**

In [1]:
import numpy as np
import pyts
from pyts.classification import BOSSVS
from pyts.datasets import fetch_ucr_dataset
from sklearn.ensemble import VotingClassifier


print("pyts: {0}".format(pyts.__version__))

pyts: 0.9.0


In [2]:
dataset_params = {
    
    'Adiac': {'word_size': 12,
              'window_size': 80,
              'norm_mean': True,
              'drop_sum': True},
    
    'ECG200': {'word_size': 5,
               'window_size': 40,
               'norm_mean': False,
               'drop_sum': False},
    
    'GunPoint': {'word_size': 14,
                 'window_size': 40,
                 'norm_mean': True,
                 'drop_sum': True},

    'MiddlePhalanxTW': {'word_size': 10,
                        'window_size': 25,
                        'norm_mean': False,
                        'drop_sum': False},
    
    'Plane': {'word_size': 6,
              'window_size': 10,
              'norm_mean': False,
              'drop_sum': False},
    
    'SyntheticControl': {'word_size': np.full(20, 6),
                         'window_size': np.arange(18, 37),
                         'norm_mean': np.full(20, False),
                         'drop_sum': np.full(20, False)}

}

In [3]:
for dataset, params in dataset_params.items():
    print(dataset)
    print('-' * len(dataset))
    
    X_train, X_test, y_train, y_test = fetch_ucr_dataset(dataset, return_X_y=True)
    
    # Truncate the input data containing padding values
    if dataset == 'MiddlePhalanxTW':
        X_train, X_test = X_train[:, :-29], X_test[:, :-29]

    if isinstance(params['window_size'], np.ndarray):
        dicts = [{key: value[i] for key, value in params.items()}
                 for i in range(len(params['window_size']))]
        bossvses = [BOSSVS(**param) for param in dicts]
        clf = VotingClassifier([('bossvs_' + str(i), bossvs)
                                 for i, bossvs in enumerate(bossvses)])
    else:
        clf = BOSSVS(**params)
    accuracy = clf.fit(X_train, y_train).score(X_test, y_test)

    print('Accuracy on the test set: {0:.3f}'.format(accuracy))
    print()

Adiac
-----
Accuracy on the test set: 0.703

ECG200
------
Accuracy on the test set: 0.860

GunPoint
--------
Accuracy on the test set: 1.000

MiddlePhalanxTW
---------------
Accuracy on the test set: 0.545

Plane
-----
Accuracy on the test set: 1.000

SyntheticControl
----------------
Accuracy on the test set: 0.980

