In [4]:
%load_ext autoreload
%autoreload 2

from typing import Optional

import torch
import numpy as np
import numpy.typing as npt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from tqdm import tqdm

from moment.utils.config import Config
from moment.utils.utils import parse_config
from moment.data.dataloader import get_timeseries_dataloader
from moment.data.classification_datasets import get_classification_datasets
from moment.models.base import BaseModel
from moment.models.moment import MOMENT
from moment.models.statistical_classifiers import fit_svm

### TODOs
- [x] Download evaluation results from TS2Vec
- [] Handle multi-variate time-series
- [] Fine-tune models on classification datasets

In [4]:
def get_embeddings_and_labels(model : torch.nn.Module, 
                              dataloader : torch.utils.data.DataLoader,
                              device : torch.device, 
                              dimension_reduction_method : str = 'tsne', 
                              n_components : Optional[int] = 320,
                              enable_embedding_pbar : bool = False):
    model = model.to(device)
    model.eval()

    embeddings = []
    labels = []

    with torch.no_grad():
        for batch_x in tqdm(dataloader, total=len(dataloader), disable=(not enable_embedding_pbar)):
            timeseries = batch_x.timeseries.float().to(device)
            input_mask = batch_x.input_mask.long().to(device)

            outputs = model.embed(x_enc=timeseries, input_mask=input_mask, reduction='mean')
            
            embeddings_ = outputs.embeddings.detach().cpu().numpy()
            embeddings.append(embeddings_)
            labels.append(batch_x.labels)

        embeddings = np.concatenate(embeddings, axis=0)
        labels = np.concatenate(labels, axis=0).squeeze()

    if dimension_reduction_method == 'tsne':
        embeddings = TSNE(n_components=n_components, n_jobs=5).fit_transform(embeddings)
    elif dimension_reduction_method == 'pca':
        embeddings = PCA(n_components=n_components).fit_transform(embeddings)
    elif dimension_reduction_method == 'none':
        pass
    else:
        raise ValueError(f"Dimension reduction method {dimension_reduction_method} not supported.")
    
    return embeddings, labels

In [2]:
classification_datasets = get_classification_datasets(collection="UCR")

In [3]:
arguments = {
    "task_name": "classification",
    "full_file_path_and_name": '/XXXX-14/project/public/XXXX-9/TimeseriesDatasets/classification/UCR/FaceFour/Wine_TEST.ts',
    "batch_size": 512,
    "shuffle": False,
    "num_workers": 5,
    "pin_memory": True,
    "seq_len" : 512,
    "data_split": 'train', # We are just doing this for the train part!!!
    "scale" : True,
    "train_ratio" : 0.6,
    "val_ratio" : 0.1,
    "test_ratio" : 0.3,
    "random_seed" : 13,
    "upsampling_pad_direction" : "backward",
    "upsampling_type" : "interpolate", # pad by default
    "downsampling_type" : "interpolate",
    "pad_mode" : "constant",
    "pad_constant_values" : 0,
}

In [4]:
args = parse_config(arguments)
args.dataset_names = args.full_file_path_and_name

args.data_split = 'train'
train_dataloader = get_timeseries_dataloader(args=args)
args.data_split = 'test'
test_dataloader = get_timeseries_dataloader(args=args)
args.data_split = 'val'
val_dataloader = get_timeseries_dataloader(args=args)

print(train_dataloader.dataset)
print(val_dataloader.dataset)
print(test_dataloader.dataset)

ClassificationDataset(dataset_name=FaceFour,n_timeseries=20,dataset_size=20,length_of_each_timeseries=350,n_channels=1,seq_len=512,data_split=train,scale=True,task_name=classification,train_ratio=0.6,val_ratio=0.1,test_ratio=0.3,output_type=univariate)
ClassificationDataset(dataset_name=FaceFour,n_timeseries=4,dataset_size=4,length_of_each_timeseries=350,n_channels=1,seq_len=512,data_split=val,scale=True,task_name=classification,train_ratio=0.6,val_ratio=0.1,test_ratio=0.3,output_type=univariate)
ClassificationDataset(dataset_name=FaceFour,n_timeseries=88,dataset_size=88,length_of_each_timeseries=350,n_channels=1,seq_len=512,data_split=test,scale=True,task_name=classification,train_ratio=0.6,val_ratio=0.1,test_ratio=0.3,output_type=univariate)


In [8]:
train_dataloader.dataset.data.shape

(350, 20)

In [7]:
# Load the model
DEFAULT_CONFIG_PATH = "../../configs/default.yaml"
GPU_ID = 7
run_name = "fast-pyramid-63" # "avid-moon-55" "proud-dust-41" "curious-blaze-53" "laced-firebrand-51" "prime-music-50" "fast-pyramid-63"

In [25]:
checkpoint = BaseModel.load_pretrained_weights(run_name=run_name, 
                                               opt_steps=20000)

config = Config(config_file_path=DEFAULT_CONFIG_PATH, default_config_file_path=DEFAULT_CONFIG_PATH).parse()
config['device'] = GPU_ID if torch.cuda.is_available() else 'cpu'

args = parse_config(config)
model = MOMENT(configs=args)
model.load_state_dict(checkpoint["model_state_dict"])

In [11]:
from moment.models.statistical_classifiers import fit_svm

In [9]:
train_embeddings, train_labels = get_embeddings_and_labels(model=model, 
                                    dataloader=train_dataloader,
                                    device=torch.device(GPU_ID), 
                                    dimension_reduction_method='none', 
                                    n_components=None)
val_embeddings, val_labels = get_embeddings_and_labels(model=model, 
                                    dataloader=val_dataloader,
                                    device=torch.device(GPU_ID), 
                                    dimension_reduction_method='none', 
                                    n_components=None)
test_embeddings, test_labels = get_embeddings_and_labels(model=model, 
                                    dataloader=test_dataloader,
                                    device=torch.device(GPU_ID), 
                                    dimension_reduction_method='none', 
                                    n_components=None)

In [None]:
train_embeddings = np.concatenate([train_embeddings, val_embeddings], axis=0)
train_labels = np.concatenate([train_labels, val_labels], axis=0)

In [None]:
print(train_embeddings.shape, train_labels.shape)
print(test_embeddings.shape, test_labels.shape)

(23, 768) (23,)
(88, 768) (88,)


In [48]:
import pickle as pkl

results_path = f"/home/extra_scratch/XXXX-2/moment_results/unsupervised_representation_learning/fast-pyramid-63/results_AllGestureWiimoteZ_TEST.pkl"
with open(results_path, "rb") as f:
    results_object = pkl.load(f)

In [49]:
train_embeddings = results_object.train_embeddings
train_labels = results_object.train_labels
test_embeddings = results_object.test_embeddings
test_labels = results_object.test_labels
print(train_embeddings.shape, train_labels.shape)
print(test_embeddings.shape, test_labels.shape)

# from sklearn.preprocessing import LabelEncoder
# label_encoder = LabelEncoder()
# train_labels = label_encoder.fit_transform(train_labels)    
# test_labels = label_encoder.transform(test_labels)
# print(f"Number of classes:", len(np.unique(train_labels)), label_encoder.classes_)

# Standard normalize the dataset
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(with_mean=False, with_std=False)
X_train = scaler.fit_transform(train_embeddings)
X_test = scaler.transform(test_embeddings)

(300, 768) (300,)
(700, 768) (700,)


In [51]:
n_samples, n_features = X_train.shape
embedding_size = 320
n_components = min(n_samples, n_features, embedding_size)
pca = PCA(n_components=n_components).fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

In [52]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

classifier = fit_svm(features=X_train, y=train_labels)
# classifier = SVC(C=1e8, gamma='scale')
# classifier = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=13)
classifier.fit(X_train, train_labels)
print(classifier)
print("Train accuracy:", classifier.score(X_train, train_labels))
print("Test accuracy:", classifier.score(X_test, test_labels))

SVC(C=10, coef0=0, max_iter=10000000)
Train accuracy: 1.0
Test accuracy: 0.5757142857142857


In [14]:
y_preds = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_true = test_labels, y_pred = y_preds))
print(classification_report(y_true = test_labels, y_pred = y_preds))

[[14 13]
 [15 12]]
              precision    recall  f1-score   support

           0       0.48      0.52      0.50        27
           1       0.48      0.44      0.46        27

    accuracy                           0.48        54
   macro avg       0.48      0.48      0.48        54
weighted avg       0.48      0.48      0.48        54



In [2]:
import pandas as pd
summary = pd.read_csv("../../assets/data/summaryUnivariate.csv")
summary.head()

Unnamed: 0,problem,numTrainCases,numTestCases,seriesLength,numClasses
0,ACSF1,100,1001460,10,
1,Adiac,390,391176,37,
2,AllGestureWiimoteX,300,700500,10,
3,AllGestureWiimoteY,300,700500,10,
4,AllGestureWiimoteZ,300,700500,10,


In [14]:
(summary["seriesLength"] <= 512).sum()

128