# WEASEL: Word Extraction for time SEries cLassification

* Website: https://www2.informatik.hu-berlin.de/~schaefpa/weasel/

* Paper: https://www2.informatik.hu-berlin.de/~schaefpa/weasel.pdf

In [1]:
import numpy as np
from pyts.transformation import BOSS
from pyts.transformation import WEASEL
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression


import pyts
print("pyts: {0}".format(pyts.__version__))

pyts: 0.8.0


In [2]:
PATH = "UCRArchive_2018/" # Change this value if necessary

In [3]:
clf = LogisticRegression(penalty='l2', C=1, fit_intercept=True,
                         solver='liblinear', multi_class='ovr')

# Adiac

In [4]:
dataset_adiac = "Adiac"
file_train_adiac = PATH + str(dataset_adiac) + "/" + str(dataset_adiac) + "_TRAIN.tsv"
file_test_adiac = PATH + str(dataset_adiac) + "/" + str(dataset_adiac) + "_TEST.tsv"

train_adiac = np.genfromtxt(fname=file_train_adiac, delimiter="\t", skip_header=0)
test_adiac = np.genfromtxt(fname=file_test_adiac, delimiter="\t", skip_header=0)

X_train_adiac, y_train_adiac = train_adiac[:, 1:], train_adiac[:, 0]
X_test_adiac, y_test_adiac = test_adiac[:, 1:], test_adiac[:, 0]

In [5]:
weasel_adiac = WEASEL(word_size=3, window_sizes=np.arange(4, 176))

pipeline_adiac = Pipeline([("weasel", weasel_adiac), ("clf", clf)])

accuracy_adiac = pipeline_adiac.fit(
    X_train_adiac, y_train_adiac).score(X_test_adiac, y_test_adiac)

print("Dataset: {}".format(dataset_adiac))
print("Accuracy on the testing set: {0:.3f}".format(accuracy_adiac))

Dataset: Adiac
Accuracy on the testing set: 0.788


# ECG200

In [6]:
dataset_ecg200 = "ECG200"
file_train_ecg200 = PATH + str(dataset_ecg200) + "/" + str(dataset_ecg200) + "_TRAIN.tsv"
file_test_ecg200 = PATH + str(dataset_ecg200) + "/" + str(dataset_ecg200) + "_TEST.tsv"

train_ecg200 = np.genfromtxt(fname=file_train_ecg200, delimiter="\t", skip_header=0)
test_ecg200 = np.genfromtxt(fname=file_test_ecg200, delimiter="\t", skip_header=0)

X_train_ecg200, y_train_ecg200 = train_ecg200[:, 1:], train_ecg200[:, 0]
X_test_ecg200, y_test_ecg200 = test_ecg200[:, 1:], test_ecg200[:, 0]

In [7]:
weasel_ecg200 = WEASEL(word_size=2, window_sizes=np.arange(3, 95))

pipeline_ecg200 = Pipeline([("weasel", weasel_ecg200), ("clf", clf)])

accuracy_ecg200 = pipeline_ecg200.fit(
    X_train_ecg200, y_train_ecg200).score(X_test_ecg200, y_test_ecg200)

print("Dataset: {}".format(dataset_ecg200))
print("Accuracy on the testing set: {0:.3f}".format(accuracy_ecg200))

Dataset: ECG200
Accuracy on the testing set: 0.850


# GunPoint

In [8]:
dataset_gunpoint = "GunPoint"
file_train_gunpoint = PATH + str(dataset_gunpoint) + "/" + str(dataset_gunpoint) + "_TRAIN.tsv"
file_test_gunpoint = PATH + str(dataset_gunpoint) + "/" + str(dataset_gunpoint) + "_TEST.tsv"

train_gunpoint = np.genfromtxt(fname=file_train_gunpoint, delimiter="\t", skip_header=0)
test_gunpoint = np.genfromtxt(fname=file_test_gunpoint, delimiter="\t", skip_header=0)

X_train_gunpoint, y_train_gunpoint = train_gunpoint[:, 1:], train_gunpoint[:, 0]
X_test_gunpoint, y_test_gunpoint = test_gunpoint[:, 1:], test_gunpoint[:, 0]

In [9]:
weasel_gunpoint = WEASEL(word_size=4, window_sizes=np.arange(5, 149))

pipeline_gunpoint = Pipeline([("weasel", weasel_gunpoint), ("clf", clf)])

accuracy_gunpoint = pipeline_gunpoint.fit(
    X_train_gunpoint, y_train_gunpoint).score(X_test_gunpoint, y_test_gunpoint)

print("Dataset: {}".format(dataset_gunpoint))
print("Accuracy on the testing set: {0:.3f}".format(accuracy_gunpoint))

Dataset: GunPoint
Accuracy on the testing set: 0.960


# MiddlePhalanxTW

In [10]:
dataset_mdtw = "MiddlePhalanxTW"
file_train_mdtw = PATH + str(dataset_mdtw) + "/" + str(dataset_mdtw) + "_TRAIN.tsv"
file_test_mdtw = PATH + str(dataset_mdtw) + "/" + str(dataset_mdtw) + "_TEST.tsv"

train_mdtw = np.genfromtxt(fname=file_train_mdtw, delimiter="\t", skip_header=0)
test_mdtw = np.genfromtxt(fname=file_test_mdtw, delimiter="\t", skip_header=0)

X_train_mdtw, y_train_mdtw = train_mdtw[:, 1:-29], train_mdtw[:, 0]
X_test_mdtw, y_test_mdtw = test_mdtw[:, 1:-29], test_mdtw[:, 0]

In [11]:
weasel_mdtw = WEASEL(word_size=2, window_sizes=np.arange(3, 50))

pipeline_mdtw = Pipeline([("weasel", weasel_mdtw), ("clf", clf)])

accuracy_mdtw = pipeline_mdtw.fit(
    X_train_mdtw, y_train_mdtw).score(X_test_mdtw, y_test_mdtw)

print("Dataset: {}".format(dataset_mdtw))
print("Accuracy on the testing set: {0:.3f}".format(accuracy_mdtw))

Dataset: MiddlePhalanxTW
Accuracy on the testing set: 0.558


# Plane

In [12]:
dataset_plane = "Plane"
file_train_plane = PATH + str(dataset_plane) + "/" + str(dataset_plane) + "_TRAIN.tsv"
file_test_plane = PATH + str(dataset_plane) + "/" + str(dataset_plane) + "_TEST.tsv"

train_plane = np.genfromtxt(fname=file_train_plane, delimiter="\t", skip_header=0)
test_plane = np.genfromtxt(fname=file_test_plane, delimiter="\t", skip_header=0)

X_train_plane, y_train_plane = train_plane[:, 1:], train_plane[:, 0]
X_test_plane, y_test_plane = test_plane[:, 1:], test_plane[:, 0]

In [13]:
weasel_plane = WEASEL(word_size=6, window_sizes=np.arange(7, 140))

pipeline_plane = Pipeline([("weasel", weasel_plane), ("clf", clf)])

accuracy_plane = pipeline_plane.fit(
    X_train_plane, y_train_plane).score(X_test_plane, y_test_plane)

print("Dataset: {}".format(dataset_plane))
print("Accuracy on the testing set: {0:.3f}".format(accuracy_plane))

Dataset: Plane
Accuracy on the testing set: 1.000


# SyntheticControl

In [14]:
dataset_synthetic = "SyntheticControl"
file_train_synthetic = PATH + str(dataset_synthetic) + "/" + str(dataset_synthetic) + "_TRAIN.tsv"
file_test_synthetic = PATH + str(dataset_synthetic) + "/" + str(dataset_synthetic) + "_TEST.tsv"

train_synthetic = np.genfromtxt(fname=file_train_synthetic, delimiter="\t", skip_header=0)
test_synthetic = np.genfromtxt(fname=file_test_synthetic, delimiter="\t", skip_header=0)

X_train_synthetic, y_train_synthetic = train_synthetic[:, 1:], train_synthetic[:, 0]
X_test_synthetic, y_test_synthetic = test_synthetic[:, 1:], test_synthetic[:, 0]

In [15]:
weasel_synthetic = WEASEL(word_size=2, window_sizes=np.arange(10, 59))

pipeline_synthetic = Pipeline([("weasel", weasel_synthetic), ("clf", clf)])

accuracy_synthetic = pipeline_synthetic.fit(
    X_train_synthetic, y_train_synthetic).score(X_test_synthetic, y_test_synthetic)

print("Dataset: {}".format(dataset_synthetic))
print("Accuracy on the testing set: {0:.3f}".format(accuracy_synthetic))

Dataset: SyntheticControl
Accuracy on the testing set: 0.973
