In [1]:
dataset_path = './datasets/'

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from sktime.highlevel.tasks import TSCTask
from sktime.highlevel.strategies import TSCStrategy

from sktime.transformers.compose import RowwiseTransformer
from sktime.transformers.compose import ColumnTransformer
from sktime.transformers.compose import Tabulariser
from sktime.transformers.segment import RandomIntervalSegmenter

from sktime.pipeline import Pipeline
from sktime.pipeline import FeatureUnion
from sktime.classifiers.distance_based import ProximityForest 

from sktime.classifiers.compose import TimeSeriesForestClassifier
from sktime.classifiers.distance_based import KNeighborsTimeSeriesClassifier

from sktime.datasets import load_gunpoint
from sktime.utils.time_series import time_series_slope

from statsmodels.tsa.stattools import acf
from statsmodels.tsa.ar_model import AR

from sklearn.preprocessing import FunctionTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

## Datasets

In [4]:
from sktime.utils.load_data import load_from_tsfile_to_dataframe

In [5]:
# ACSF1
acsf1_train_x, acsf1_train_y = load_from_tsfile_to_dataframe(dataset_path + '/ACSF1/ACSF1_TRAIN.ts')
acsf1_test_x, acsf1_test_y = load_from_tsfile_to_dataframe(dataset_path + '/ACSF1/ACSF1_TEST.ts')

In [6]:
# ECG5000
ecg5k_train_x, ecg5k_train_y = load_from_tsfile_to_dataframe(dataset_path + '/ECG5000/ECG5000_TRAIN.ts') 
ecg5k_test_x, ecg5k_test_y = load_from_tsfile_to_dataframe(dataset_path + '/ECG5000/ECG5000_TEST.ts')

In [7]:
# Wafer
wafer_train_x, wafer_train_y = load_from_tsfile_to_dataframe(dataset_path + '/Wafer/Wafer_TRAIN.ts') 
wafer_test_x, wafer_test_y = load_from_tsfile_to_dataframe(dataset_path + '/Wafer/Wafer_TEST.ts')

In [8]:
# UWaveGestureLibrary
uwgl_train_x, uwgl_train_y = load_from_tsfile_to_dataframe(dataset_path + '/UWaveGestureLibrary/UWaveGestureLibrary_TRAIN.ts') 
uwgl_test_x, uwgl_test_y = load_from_tsfile_to_dataframe(dataset_path + '/UWaveGestureLibrary/UWaveGestureLibrary_TEST.ts')

In [9]:
# StandWalkJump
swj_train_x, swj_train_y = load_from_tsfile_to_dataframe(dataset_path + '/StandWalkJump/StandWalkJump_TRAIN.ts') 
swj_test_x, swj_test_y = load_from_tsfile_to_dataframe(dataset_path + '/StandWalkJump/StandWalkJump_TEST.ts')

In [10]:
# FDC
from sklearn.model_selection import train_test_split

fdc_data = pd.read_csv(dataset_path + '/FDC/data.txt', sep='\t')
fdc_data['sensor'] = fdc_data['sensor'].apply(lambda x: int(x.replace('sensor_','')))
fdc_data['recipe_step'] = fdc_data['recipe_step'].apply(lambda x: int(x.replace('rs_','')))
fdc_data['unit_name'] = fdc_data['unit_name'].apply(lambda x: int(x.replace('unit_','')))
fdc_data['lot_id'] = fdc_data['lot_id'].apply(lambda x: int(x.replace('lot_','')))

fdc_train_x, fdc_test_x, fdc_train_y, fdc_test_y = train_test_split(fdc_data.drop(columns=['y']), fdc_data['y'], test_size = 0.15, stratify=fdc_data['y'], random_state=7)

## Preprocessing

In [11]:
def flatten_ts(df):
    new_list = []
    for _, row in df.iterrows():
        new_dict = {}
        for i in row.index:
            for j in range(len(row[i])):
                new_dict[i + '_' + str(j)] = row[i][j]
        new_list.append(new_dict)
    return pd.DataFrame(new_list)

## Train Models

In [12]:
from tensorflow.keras.layers import LSTM, GRU, Bidirectional, TimeDistributed, RepeatVector, Dense, Attention, Input, Embedding

In [13]:
def rounded_accuracy(y_true, y_pred):
    return keras.metrics.binary_accuracy(tf.round(y_true), tf.round(y_pred))

### RNN AE

In [88]:
def RNN_AE(n_step, n_dim):   
    recurrent_encoder = keras.models.Sequential([
        GRU(100, return_sequences=True, input_shape=[n_step, n_dim]),
        GRU(50)
    ])

    recurrent_decoder = keras.models.Sequential([
        RepeatVector(n_step, input_shape=[50]),
        GRU(100, return_sequences=True),
        TimeDistributed(Dense(n_dim, activation="sigmoid"))
    ])

    return keras.models.Sequential([recurrent_encoder, recurrent_decoder])

### BRNN AE

In [None]:
recurrent_encoder = keras.models.Sequential([
    Bidirectional(GRU(100, return_sequences=True, input_shape=[28, 28])),
    Bidirectional(GRU(30))
])

recurrent_decoder = keras.models.Sequential([
    RepeatVector(28, input_shape=[30]),
    GRU(100, return_sequences=True),
    TimeDistributed(Dense(28, activation="sigmoid"))
])

recurrent_ae = keras.models.Sequential([recurrent_encoder, recurrent_decoder])
recurrent_ae.compile(loss="binary_crossentropy", optimizer=keras.optimizers.SGD(0.1), metrics=[rounded_accuracy])

In [None]:
# history = recurrent_ae.fit(X_train, X_train, epochs=10, validation_data=[X_valid, X_valid])

### Attention BRNN AE

In [None]:
embed_size = 512; max_steps = 500; vocab_size = 10000
encoder_inputs = Input(shape=[None], dtype=np.int32)
decoder_inputs = Input(shape=[None], dtype=np.int32)
embeddings = Embedding(vocab_size, embed_size)
encoder_embeddings = embeddings(encoder_inputs)
decoder_embeddings = embeddings(decoder_inputs)
positional_encoding = PositionalEncoding(max_steps, max_dims=embed_size)
encoder_in = positional_encoding(encoder_embeddings)
decoder_in = positional_encoding(decoder_embeddings)

In [None]:
Z = encoder_in
for N in range(6):
    Z = Attention(use_scale=True)([Z, Z])

encoder_outputs = Z
Z = decoder_in
for N in range(6):
    Z = Attention(use_scale=True, causal=True)([Z, Z])
    Z = Attention(use_scale=True)([Z, encoder_outputs])

outputs = TimeDistributed(Dense(vocab_size, activation="softmax"))(Z)

### RNN VAE

In [None]:
class Sampling(keras.layers.Layer):
    def call(self, inputs):
        mean, log_var = inputs
        return K.random_normal(tf.shape(log_var)) * K.exp(log_var / 2) + mean

In [None]:
codings_size = 10

inputs = Input(shape=[28, 28])
z = keras.layers.Flatten()(inputs)
z = Dense(150, activation="selu")(z)
z = Dense(100, activation="selu")(z)
codings_mean = Dense(codings_size)(z)
codings_log_var = Dense(codings_size)(z)
codings = Sampling()([codings_mean, codings_log_var])
variational_encoder = keras.models.Model(inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])

decoder_inputs = Input(shape=[codings_size])
x = Dense(100, activation="selu")(decoder_inputs)
x = Dense(150, activation="selu")(x)
x = Dense(28 * 28, activation="sigmoid")(x)
outputs = keras.layers.Reshape([28, 28])(x)
variational_decoder = keras.models.Model(inputs=[decoder_inputs], outputs=[outputs])

_, _, codings = variational_encoder(inputs)
reconstructions = variational_decoder(codings)
variational_ae = keras.models.Model(inputs=[inputs], outputs=[reconstructions])

latent_loss = -0.5 * K.sum(
    1 + codings_log_var - K.exp(codings_log_var) - K.square(codings_mean),
    axis=-1)
variational_ae.add_loss(K.mean(latent_loss) / 784.)
variational_ae.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=[rounded_accuracy])
history = variational_ae.fit(X_train, X_train, epochs=25, batch_size=128, validation_data=[X_valid, X_valid])

### BRNN VAE

### Attention BRNN VAE

## Evaluation

In [15]:
# TO-DO
# Make a dictionary-based iterable evaluation list (key=datasets' names [train, test])
# Using CV (K=5) to find the average performance with s.d. scors
# Export as a summary table

In [14]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import f1_score, accuracy_score

In [15]:
rf_clf = RandomForestClassifier(n_jobs=-1)
svm_clf = SVC()

### ACSF1

In [16]:
acsf1_train_x = acsf1_train_x.values
acsf1_test_x = acsf1_test_x.values

In [None]:
rf_clf.fit(flatten_ts(acsf1_train_x), acsf1_train_y)
pred = rf_clf.predict(flatten_ts(acsf1_test_x))
'ACC:', accuracy_score(acsf1_test_y, pred), 'F1:', f1_score(acsf1_test_y, pred, average='weighted')

In [None]:
svm_clf.fit(flatten_ts(acsf1_train_x), acsf1_train_y)
pred = svm_clf.predict(flatten_ts(acsf1_test_x))
'ACC:', accuracy_score(acsf1_test_y, pred), 'F1:', f1_score(acsf1_test_y, pred, average='weighted')

In [17]:
acsf1_train_x = acsf1_train_x.reshape(1, acsf1_train_x.shape[0], acsf1_train_x.shape[1])
acsf1_test_x = acsf1_test_x.reshape(1, acsf1_test_x.shape[0], acsf1_test_x.shape[1])

In [18]:
recurrent_encoder = keras.models.Sequential([
    GRU(100, return_sequences=True, input_shape=[100, 1]),
    GRU(50)
])

recurrent_decoder = keras.models.Sequential([
    RepeatVector(100, input_shape=[50]),
    GRU(100, return_sequences=True),
    TimeDistributed(Dense(1))
])

rnn_ae = keras.models.Sequential([recurrent_encoder, recurrent_decoder])

In [19]:
# model = RNN_AE(acsf1_train_x.shape[1], acsf1_train_x.shape[2])

In [20]:
rnn_ae.compile(loss="binary_crossentropy", optimizer=keras.optimizers.SGD(0.1), metrics=[rounded_accuracy])

In [21]:
history = rnn_ae.fit(acsf1_train_x, acsf1_train_x, epochs=10, validation_data=[acsf1_test_x, acsf1_test_x])

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type Series).

In [None]:
codings = encoder.predict(X_train)

In [23]:
tf.test.is_gpu_available()

False

### ECG5000

In [None]:
rf_clf.fit(flatten_ts(ecg5k_train_x), ecg5k_train_y)
pred = rf_clf.predict(flatten_ts(ecg5k_test_x))
'ACC:', accuracy_score(ecg5k_test_y, pred), 'F1:', f1_score(ecg5k_test_y, pred, average='weighted')

In [None]:
svm_clf.fit(flatten_ts(ecg5k_train_x), ecg5k_train_y)
pred = svm_clf.predict(flatten_ts(ecg5k_test_x))
'ACC:', accuracy_score(ecg5k_test_y, pred), 'F1:', f1_score(ecg5k_test_y, pred, average='weighted')

### Wafer

In [None]:
rf_clf.fit(flatten_ts(wafer_train_x), wafer_train_y)
pred = rf_clf.predict(flatten_ts(wafer_test_x))
'ACC:', accuracy_score(wafer_test_y, pred), 'F1:', f1_score(wafer_test_y, pred, average='weighted')

In [None]:
svm_clf.fit(flatten_ts(wafer_train_x), wafer_train_y)
pred = svm_clf.predict(flatten_ts(wafer_test_x))
'ACC:', accuracy_score(wafer_test_y, pred), 'F1:', f1_score(wafer_test_y, pred, average='weighted')

### UWaveGestureLibrary

In [None]:
rf_clf.fit(flatten_ts(uwgl_train_x), uwgl_train_y)
pred = rf_clf.predict(flatten_ts(uwgl_test_x))
'ACC:', accuracy_score(uwgl_test_y, pred), 'F1:', f1_score(uwgl_test_y, pred, average='weighted')

In [None]:
svm_clf.fit(flatten_ts(uwgl_train_x), uwgl_train_y)
pred = svm_clf.predict(flatten_ts(uwgl_test_x))
'ACC:', accuracy_score(uwgl_test_y, pred), 'F1:', f1_score(uwgl_test_y, pred, average='weighted')

### StandWalkJump

In [None]:
rf_clf.fit(flatten_ts(swj_train_x), swj_train_y)
pred = rf_clf.predict(flatten_ts(swj_test_x))
'ACC:', accuracy_score(swj_test_y, pred), 'F1:', f1_score(swj_test_y, pred, average='weighted')

In [None]:
svm_clf.fit(flatten_ts(swj_train_x), swj_train_y)
pred = svm_clf.predict(flatten_ts(swj_test_x))
'ACC:', accuracy_score(swj_test_y, pred), 'F1:', f1_score(swj_test_y, pred, average='weighted')

### FDC

In [None]:
rf_clf.fit(fdc_train_x, fdc_train_y)
pred = rf_clf.predict(fdc_test_x)
'ACC:', accuracy_score(fdc_test_y, pred), 'F1:', f1_score(fdc_test_y, pred, average='weighted')

In [None]:
svm_clf.fit(fdc_train_x, fdc_train_y)
pred = svm_clf.predict(fdc_test_x)
'ACC:', accuracy_score(fdc_test_y, pred), 'F1:', f1_score(fdc_test_y, pred, average='weighted')