## Unsupervised tests on codebook

In [6]:
from src.preprocessing.preprocess_ucr import UCRDatasetImporter
from src.preprocessing.data_pipeline import build_data_pipeline
from src.utils import load_yaml_param_settings
from src.models.vqvae_classification import BaseVQVAE, remap_clusters
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier

from sklearn import metrics
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np

In [7]:
config_dir = 'src/configs/config.yaml' #dir to config file

config = load_yaml_param_settings(config_dir)

# data pipeline
dataset_importer = UCRDatasetImporter(**config['dataset'])
batch_size = config['dataset']['batch_sizes']['vqvae']
train_data_loader, test_data_loader = [build_data_pipeline(batch_size, dataset_importer, config, kind) for kind in ['train', 'test']]

input_length = train_data_loader.dataset.X.shape[-1]

vqvae = BaseVQVAE(input_length, config)

self.X_train.shape: (500, 128)
self.X_test.shape: (625, 128)
# unique labels (train): [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]
# unique labels (test): [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


Grabbind codebook data:

In [10]:
zqs_train, _ = vqvae.run_through_codebook(train_data_loader)
y_train = train_data_loader.dataset.Y.flatten().astype(int)

zqs_test, _ = vqvae.run_through_codebook(test_data_loader)
y_test = test_data_loader.dataset.Y.flatten().astype(int)

#scaling:
scaler = StandardScaler()
zqs_train_scaled = scaler.fit_transform(zqs_train)
zqs_test_scaled = scaler.fit_transform(zqs_test)

test_size = 0.2
Z_train, Z_test, Y_train, Y_test = train_test_split(zqs_test, y_test, test_size=test_size)
Z_train_scaled, Z_test_scaled, Y_train, Y_test = train_test_split(zqs_test_scaled, y_test, test_size=test_size)

## KNN

In [13]:
k = len(np.unique(y_train))

neigh = KNeighborsClassifier(n_neighbors=k)

neigh.fit(zqs_train, y_train)

y_pred_knn = neigh.predict(zqs_test)

print("Accuracy:", metrics.accuracy_score(y_true=y_test, y_pred=y_pred_knn))

Accuracy: 0.0576


on splitted dataset

In [14]:
neigh = KNeighborsClassifier(n_neighbors=k)

neigh.fit(Z_train, Y_train)

Y_pred_knn = neigh.predict(Z_test)

print("Accuracy:", metrics.accuracy_score(y_true=Y_test, y_pred=Y_pred_knn))

Accuracy: 0.072


## SVM

In [15]:
svm = SVC(kernel='linear')
svm.fit(zqs_train, y_train)

In [16]:
y_pred_svm = svm.predict(zqs_test)
print("Accuracy:", metrics.accuracy_score(y_true=y_test, y_pred=y_pred_svm))

Accuracy: 0.0592


On splitted dataset

In [17]:
svm = SVC(kernel='linear')
svm.fit(Z_train, Y_train)
Y_pred = svm.predict(Z_test)

print("Accuracy:", metrics.accuracy_score(y_true=Y_test, y_pred=Y_pred))

Accuracy: 0.072


## UMAP-SVM

In [3]:
import umap

def UMAP_SVM(x_train, y_labs):
    dens_mapper = umap.UMAP(densmap=True).fit(x_train)
    embs = dens_mapper.embedding_

    svm = SVC(kernel='linear')
    svm.fit(embs, y_labs)

    return svm

In [4]:
umap_svm = UMAP_SVM(zqs_train, y_train)

NameError: name 'zqs_train' is not defined

## Linear discriminant analysis

In [36]:
clf = LinearDiscriminantAnalysis()
clf.fit(zqs_train, y_train)

y_pred_lda = clf.predict(zqs_test)

print("accuracy:", metrics.accuracy_score(y_pred=y_pred_lda, y_true=y_test))

accuracy: 0.4266666666666667


on splitted dataset

In [37]:
clf2 = LinearDiscriminantAnalysis()
clf2.fit(Z_train, Y_train)

Y_pred_lda = clf2.predict(Z_test)

print("accuracy:", metrics.accuracy_score(y_pred=Y_pred_lda, y_true=Y_test))

accuracy: 1.0
