In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import array
from keras.models import Sequential
from keras.models import Model
from keras.layers import LSTM, GRU
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
import warnings
from sklearn.metrics.pairwise import euclidean_distances
warnings.filterwarnings("ignore")
from sklearn.neighbors import KNeighborsClassifier

In [17]:
UNIVARIATE_DATASET_NAMES_2018 = ['Adiac','Beef',  'CBF', 'ChlorineConcentration','CinCECGTorso', 'Coffee', 'DiatomSizeReduction']

In [19]:
def get_encoded_distances(name):
    df = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TEST.tsv', header = None, sep = '\t', error_bad_lines=False)
    df['Y'] = df[0]
    df1 = df.drop(0, axis =1)
    X = df1.drop('Y', axis = 1).values.reshape((len(df1), len(df1.columns) - 1, 1))
    n_in = len(X[0])

    model = Sequential()
    model.add(GRU(n_in, activation='relu', input_shape=(n_in,1), return_sequences=True))
    model.add(GRU(200, activation='relu', return_sequences=True))
    model.add(GRU(100, activation='relu', return_sequences=False))
    model.add(RepeatVector(n_in))
    model.add(GRU(100, activation='relu', return_sequences=True))
    model.add(GRU(200, activation='relu', return_sequences=True))
    model.add(GRU(n_in, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(1)))
    model.compile(optimizer = 'adam', loss='mse')

    model.fit(X, X, epochs=1000, batch_size=80, verbose=2)
    # demonstrate reconstruction
    yhat = model.predict(X, verbose=0)
    print(name)
    print('---Predicted---')
    print(np.round(yhat,3))
    print('---Actual---')
    print(np.round(X, 3))

    model1 = Model(inputs=model.inputs, outputs=model.layers[2].output)

    pred = model1.predict(X)
    
    return euclidean_distances(pred)

In [85]:
def get_trained_autoencoder(X, ep = 300):
    n_in = len(X[0])

    model = Sequential()
    model.add(GRU(n_in, activation='relu', input_shape=(n_in,1), return_sequences=True))
    model.add(GRU(200, activation='relu', return_sequences=True))
    model.add(GRU(100, activation='relu', return_sequences=False))
    model.add(RepeatVector(n_in))
    model.add(GRU(100, activation='relu', return_sequences=True))
    model.add(GRU(200, activation='relu', return_sequences=True))
    model.add(GRU(n_in, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(1)))
    model.compile(optimizer = 'adam', loss='mse')

    model.fit(X, X, epochs = ep, batch_size=80, verbose=0)
    return model

In [63]:
def reformat_data_train(name):
    df = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TRAIN.tsv', header = None, sep = '\t', error_bad_lines=False)
    df['Y'] = df[0]
    df1 = df.drop(0, axis =1)
    X = df1.drop('Y', axis = 1).values.reshape((len(df1), len(df1.columns) - 1, 1))
    return X, df1.Y

In [64]:
def reformat_data_test(name):
    df = pd.read_csv('UCRArchive_2018/' + name + '/' + name + '_TEST.tsv', header = None, sep = '\t', error_bad_lines=False)
    df['Y'] = df[0]
    df1 = df.drop(0, axis =1)
    X = df1.drop('Y', axis = 1).values.reshape((len(df1), len(df1.columns) - 1, 1))
    return X, df1.Y

In [88]:
for name in UNIVARIATE_DATASET_NAMES_2018:
    print(name)
    X_train, Y_train = reformat_data_train(name)
    X_test, Y_test = reformat_data_test(name)
    model = get_trained_autoencoder(X_train)
    model1 = Model(inputs=model.inputs, outputs=model.layers[2].output)
    knn = KNeighborsClassifier(n_neighbors = 1)
    encoded_train = model1.predict(X_train)
    encoded_test = model1.predict(X_test)
    pd.DataFrame(encoded_train).to_csv(name + '_encoded_TRAIN.csv')
    pd.DataFrame(encoded_test).to_csv(name + '_encoded_TEST.csv')
    knn.fit(encoded_train, Y_train)
    print('Score on training set', knn.score(encoded_train, Y_train))
    print('Score on testing set', knn.score(encoded_test, Y_test))

Adiac
Score on training set 1.0
Score on testing set 0.26854219948849106
Beef
Score on training set 1.0
Score on testing set 0.36666666666666664
CBF
Score on training set 1.0
Score on testing set 0.9011111111111111
ChlorineConcentration
Score on training set 1.0
Score on testing set 0.42552083333333335
CinCECGTorso
Score on training set 1.0
Score on testing set 0.32318840579710145
Coffee
Score on training set 1.0
Score on testing set 0.2857142857142857
DiatomSizeReduction
Score on training set 1.0
Score on testing set 0.5915032679738562
