In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from time import time
from os import listdir, path

from numpy.random import choice, shuffle
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from scipy.stats import zscore
from dtaidistance import dtw, dtw_c, dtw_ndim
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, random_split


from IPython.display import clear_output

In [None]:
%load_ext autoreload
%autoreload 1

import preprocessing
import utils
from models import Encoder, DecoderLinear, DecoderLSTM, Sequence2Sequence

%aimport preprocessing
%aimport models
%aimport utils

In [None]:
device = torch.device("cuda")
BASE_PATH = "../data/Univariate_arff"
TRAIN = "_TRAIN"
TEST = "_TEST"

BIG_CONST = 100000
datasets = [name for name in listdir(BASE_PATH) if path.isdir(path.join(BASE_PATH, name))]

In [None]:
with open("../data/length_Univariate_arff.json") as f:
    lengths = json.load(f)

In [None]:
w = 3
k = 3
hidden_dim = 3
sample_size = 150

results = {name: {} for name in datasets}
try:
    for idx, problem in enumerate([name for name in datasets if lengths[name] < 200]):    
        length = lengths[problem]
        data_path = path.join(BASE_PATH, problem, problem)    
        
        enc = Encoder(length, 2*k, hidden_dim).cuda()
        dec = DecoderLinear(length, 2*k, hidden_dim).cuda()
        model = Sequence2Sequence(enc, dec) 
        
        X, y = preprocessing.get_dataset(data_path, length=length)
        X = X[:sample_size]
        y = y[:sample_size]
        
        train_set, test_set, valid_set = preprocessing.prepare_data(X, y, k, w, device)
        train_it = iter(train_set)
        
        opt = torch.optim.Adam(model.parameters())
        loss_fn = nn.MSELoss()
        utils.train(model, train_set, opt, loss_fn, valid_set, 4000, problem)

        valid_it = iter(valid_set)
        batch, timeseries, labels = next(valid_it)
        batch = batch.cpu().detach().numpy()
        timeseries = timeseries.numpy()
        idxs = np.arange(len(timeseries)).reshape(-1, 1)

        scores_hidden = []
        scores_ts = []
        t = time()
        hiddens, state = enc(batch.permute(1, 0, 2))
        hiddens = hiddens.permute(1, 0, 2).cpu().detach().numpy()

        print(problem)
        scores_ts, scores_hidden = utils.classify(timeseries, hiddens, labels)
        results[problem]["ts"] = scores_ts
        results[problem]["hidden"] = scores_hidden
        results[problem]["shape"] = X.shape
        results[problem]["balance"] = np.unique(y, return_counts=True)
        print()

        with open("results_ae_l.json", "w") as f:
            json.dump(lengths, f)

except Exception as exc:
    with open("results_ae_l.json", "w") as f:
        json.dump(lengths, f)

    raise exc

In [None]:
enc = Encoder(length, 2*k, hidden_dim).cuda()
dec = DecoderLinear(length, 2*k, hidden_dim).cuda()
model = Sequence2Sequence(enc, dec) 

opt = torch.optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
utils.train(model, train_set, opt, loss_fn, valid_set, 4000)

hiddens, state = enc(batch.permute(1, 0, 2))
x_out = dec(hiddens, state).permute(1, 0, 2).cpu().detach().numpy()
hiddens = hiddens.permute(1, 0, 2).cpu().detach().numpy()

_ = utils.classify(timeseries, hiddens, labels)
utils.plot(timeseries, batch.cpu(), hiddens, x_out, 20)

In [None]:
enc = Encoder(length, 2*k, hidden_dim).cuda()
dec = DecoderLSTM(length, 2*k, hidden_dim).cuda()
model = Sequence2Sequence(enc, dec) 

opt = torch.optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
utils.train(model, train_set, opt, loss_fn, valid_set, 4000)

hiddens, state = enc(batch.permute(1, 0, 2))
x_out = dec(hiddens, state).permute(1, 0, 2).cpu().detach().numpy()
hiddens = hiddens.permute(1, 0, 2).cpu().detach().numpy()

_ = utils.classify(timeseries, hiddens, labels)
utils.plot(timeseries, batch.cpu(), hiddens, x_out, 20)