In [None]:
ucr_ts_datasets_dir = "../datasets/UCR_TS_Archive_2015/"
base_results_path="../results"

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

SEED = 813306
np.random.seed(SEED)

from keras.utils import np_utils
from keras.models import Sequential, Model
import keras.layers as L 
from keras.callbacks import ReduceLROnPlateau, CSVLogger, ModelCheckpoint
from keras_tqdm import TQDMNotebookCallback

%matplotlib inline

In [None]:
dataset_names = [fname for fname in os.listdir(ucr_ts_datasets_dir)
                 if fname[0] != "."]

### A set of methods for running experiments

In [None]:
def read_ucr_dataset(dataset_name, 
                     base_path=ucr_ts_datasets_dir,
                     convert_labels_to_categorical=True,
                     normalize=True):
    
    train_filepath = os.path.join(base_path, dataset_name, dataset_name + "_TRAIN")
    train = np.loadtxt(train_filepath, delimiter=",")
    y_train = train[:,0]
    x_train = train[:,1:]
    
    test_filepath = os.path.join(base_path, dataset_name, dataset_name + "_TEST")
    test = np.loadtxt(test_filepath, delimiter=",")
    y_test = test[:,0]
    x_test = test[:,1:]
    
    if convert_labels_to_categorical:
        num_classes = len(np.unique(y_train))
        y_max = y_train.max()
        y_min = y_train.min()
        
        # Convert labels to range 0. to (num_classes - 1)
        y_train = (y_train - y_min) / (y_max - y_min) * (num_classes - 1)
        y_test =  (y_test  - y_min) / (y_max - y_min) * (num_classes - 1)
        
        y_train = np_utils.to_categorical(y_train, num_classes)
        y_test =  np_utils.to_categorical(y_test,  num_classes)
        
    if normalize:
        # Do not leak information about test data 
        x_train_mean = x_train.mean()
        x_train_stdev = x_train.std()
        
        x_train = (x_train - x_train_mean) / x_train_stdev
        x_test =  (x_test  - x_train_mean) / x_train_stdev
    
    return ((x_train, y_train), (x_test, y_test))

In [None]:
def train_nn_and_save_results(dataset_name, model_name,
                        x_train, y_train, 
                        x_test, y_test,
                        batch_size,
                        model, 
                        opt_params,
                        base_path=base_results_path,
                        load_weights_if_exist=True):
    
    result_path = os.path.join(base_path, model_name, dataset_name)
    file_prefix = "%s_%s" %(model_name, dataset_name)
    
    weights_filename = os.path.join(result_path, file_prefix+"_weights.h5")
    history_filename = os.path.join(result_path, file_prefix+"_history.csv")
    log_filename = os.path.join(result_path, file_prefix+"_fit_log.csv")
    
    initial_epoch = 0
    
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    elif load_weights_if_exist: #Continue learning from existing weights file
        model.load_weights(weights_filename)
        initial_epoch = len(pd.read_csv(log_filename))
  
    model.compile(loss=opt_params.get("loss", "categorical_crossentropy"), 
                  optimizer=opt_params.get("optimizer", "Adam"),
                  metrics=opt_params.get("metrics", ["accuracy"]))  
    reduce_lr = ReduceLROnPlateau(
        monitor=opt_params.get("reduce_lr_monitor", "loss"),              
        factor=opt_params.get("reduce_lr_factor", 0.5),
        patience=opt_params.get("reduce_lr_patience", 200), 
        min_lr=opt_params.get("reduce_lr_min_lr", 0.01))   
       
    model_learning_history = model.fit(
        x_train, y_train, 
        batch_size=batch_size, 
        epochs=opt_params.get("epochs", 500),
        verbose=0, 
        validation_data=(x_test, y_test), 
        initial_epoch=initial_epoch,
        callbacks = [
            reduce_lr, 
            TQDMNotebookCallback(), 
            ModelCheckpoint(
                os.path.join(
                    result_path, 
                    file_prefix)+\
                    "_weights_checkpoint_{epoch:02d}-{val_loss:.2f}.h5", 
                period=50),
            CSVLogger(log_filename, append=True)])    
    #Save weights
    model.save(weights_filename)
    #Save learning history
    pd.DataFrame(model_learning_history.history)\
        .to_csv(history_filename)
    
    #Save predicted train classes
    pd.DataFrame(
        {"y_true":      np.argmax(y_train, axis=1), 
         "y_predicted": np.argmax(model.predict(x_train), axis=1)})\
        .to_csv(os.path.join(result_path, 
                             file_prefix+"_train_labels_and_preds.csv"))
    
    #Save predicted test classes
    pd.DataFrame(
        {"y_true":      np.argmax(y_test, axis=1), 
         "y_predicted": np.argmax(model.predict(x_test), axis=1)})\
        .to_csv(os.path.join(result_path, 
                             file_prefix+"_test_labels_and_preds.csv"))

In [None]:
def run_experiments_on_datasets(model_name, model_creation_function, 
                                model_opt_params, add_dim=False, 
                                dataset_names=dataset_names):
    """
    Parameters
    ----------
    model_name : str
                 A name of the studied model. Will be used as a folder name and as a prefix for resulting files.
    
    model_creation_function : (ts_length : int, num_classes : int) -> keras.Model 
                              A function that creates a Keras model using the values of input time series length and the number of classes.
    
    model_opt_params : dict
                       A dictionary which contains the value of optimization parameters for Keras' 'model.compile' method and learning callbacks. For example, 'optimizer' or 'epochs'. 
    
    add_dim : boolean (default False)
              Shows if it is necessary to add additional dummy dimension of size 1 to input tensor.
    
    dataset_names : list of str
                    A list of names of UCR Datasests which will be used for the experiment.
    
    """
    for dataset_name in dataset_names:
        (x_train, y_train), (x_test, y_test) = read_ucr_dataset(dataset_name)
        batch_size = min(int(x_train.shape[0]/10), 16)
        
        model = model_creation_function(x_train.shape[1], y_train.shape[1])
        
        if add_dim:
            x_train = x_train.reshape(x_train.shape + (1,))
            x_test = x_test.reshape(x_test.shape + (1,))
        
        train_nn_and_save_results(dataset_name, model_name,
                                  x_train, y_train, 
                                  x_test, y_test,
                                  batch_size, 
                                  model, 
                                  model_opt_params)

### Model generation functions and the defenition of learning parameters

In [None]:
def build_mlp_model(ts_length, num_classes):
    mlp = Sequential()
    
    mlp.add(L.Dropout(0.1, input_shape=(ts_length,)))
    mlp.add(L.Dense(500, activation="relu"))
    
    mlp.add(L.Dropout(0.2))
    mlp.add(L.Dense(500, activation="relu"))
    
    mlp.add(L.Dropout(0.2))
    mlp.add(L.Dense(500, activation="relu"))
    
    mlp.add(L.Dropout(0.3))
    mlp.add(L.Dense(num_classes, activation="softmax"))
    
    return mlp

mlp_opt_params = {
    "optimizer": "adadelta",
    "reduce_lr_patience": 200,
    "reduce_lr_min_lr": 0.01,
    "epochs": 5000
}

In [None]:
def build_fcn_model(ts_length, num_classes):
    fcn = Sequential()
    
    fcn.add(L.Conv1D(128, 8, padding="same", 
                     input_shape=(ts_length,1,)))
    fcn.add(L.normalization.BatchNormalization())
    fcn.add(L.Activation("relu"))
    
    fcn.add(L.Conv1D(256, 5, padding="same"))
    fcn.add(L.normalization.BatchNormalization())
    fcn.add(L.Activation("relu"))
    
    fcn.add(L.Conv1D(128, 3, padding="same"))
    fcn.add(L.normalization.BatchNormalization())
    fcn.add(L.Activation("relu"))
    
    fcn.add(L.GlobalAveragePooling1D())
    fcn.add(L.Dense(num_classes, activation="softmax"))
    
    return fcn

fcn_opt_params = {
    "optimizer": "adam",
    "reduce_lr_patience": 50,
    "reduce_lr_min_lr": 0.0001,
    "epochs": 2000
}

In [None]:
def build_simple_rnn_rnn_model(ts_length, num_classes, 
                               layers=[128, 128, 128]):
    rnn = Sequential()
    rnn.add(L.SimpleRNN(layers[0], 
                        input_shape=(None, 1,),
                        activation="tanh", 
                        return_sequences=True))
    if len(layers) > 1:
        for layer_width in layers[1:]:
            rnn.add(L.SimpleRNN(layer_width, 
                    activation="tanh", 
                    return_sequences=True))

    rnn.add(L.SimpleRNN(num_classes, activation="softmax"))
    return rnn

simple_rnn_opt_params = {
    "optimizer": "adam",
    "reduce_lr_patience": 10,
    "reduce_lr_min_lr": 0.01,
    "epochs": 500
}

In [None]:
def build_simple_rnn_dense_model(ts_length, num_classes, 
                                 layers=[128, 128, 128]):
    
    more_than_one_hidden = len(layers) > 1
    
    rnn = Sequential()
    rnn.add(L.SimpleRNN(layers[0], 
                        input_shape=(None, 1,),
                        activation="tanh", 
                        return_sequences=more_than_one_hidden))
    l = 1
    if more_than_one_hidden:
        for layer_width in layers[1:]:
            l += 1
            rnn.add(L.SimpleRNN(layer_width, 
                    activation="tanh", 
                    return_sequences=(l!=len(layers))))
            

    rnn.add(L.Dense(num_classes, activation="softmax"))
    return rnn

In [None]:
def build_lstm_model(ts_length, num_classes, 
                     layers=[128, 128, 128]):
    more_than_one_hidden = len(layers) > 1
    
    rnn = Sequential()
    rnn.add(L.LSTM(layers[0], 
                        input_shape=(None, 1,),
                        activation="tanh", 
                        return_sequences=more_than_one_hidden))
    l = 1
    if more_than_one_hidden:
        for layer_width in layers[1:]:
            l += 1
            rnn.add(L.LSTM(layer_width, 
                    activation="tanh", 
                    return_sequences=(l!=len(layers))))
            

    rnn.add(L.Dense(num_classes, activation="softmax"))
    return rnn

### Experiments code execution

##### Feedforward networks

In [None]:
run_experiments_on_datasets(
    "mlp", build_mlp_model, mlp_opt_params)

In [None]:
run_experiments_on_datasets(
    "fcn", build_fcn_model, fcn_opt_params, add_dim=True)

##### The networks with simple recurrent hidden layers and a dense output layer

In [None]:
run_experiments_on_datasets(
    "rnn_128_dense", 
    lambda ts_length, num_classes: build_simple_rnn_dense_model(
                                       ts_length, num_classes, 
                                       layers=[128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "rnn_128_128_dense", 
    lambda ts_length, num_classes: build_simple_rnn_dense_model(
                                       ts_length, num_classes, 
                                       layers=[128, 128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "rnn_128_128_128_dense", 
    lambda ts_length, num_classes: build_simple_rnn_dense_model(
                                       ts_length, num_classes, 
                                       layers=[128, 128, 128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "rnn_256_dense", 
    lambda ts_length, num_classes: build_simple_rnn_dense_model(
                                       ts_length, num_classes, 
                                       layers=[256]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

##### The networks with LSTM hidden layers and a dense output layer

In [None]:
run_experiments_on_datasets(
    "lstm_128_dense", 
    lambda ts_length, num_classes: build_lstm_model(
                                       ts_length, num_classes, 
                                       layers=[128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "lstm_128_128_dense", 
    lambda ts_length, num_classes: build_lstm_model(
                                       ts_length, num_classes, 
                                       layers=[128, 128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "lstm_256_dense", 
    lambda ts_length, num_classes: build_lstm_model(
                                       ts_length, num_classes, 
                                       layers=[256]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

##### The networks with simple recurrent hidden layers and a recurrent output layer

In [None]:
run_experiments_on_datasets(
    "rnn_128_rnn", 
    lambda ts_length, num_classes: build_simple_rnn_rnn_model(
                                       ts_length, num_classes, 
                                       layers=[128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "rnn_128_128_rnn", 
    lambda ts_length, num_classes: build_simple_rnn_rnn_model(
                                       ts_length, num_classes, 
                                       layers=[128, 128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)

In [None]:
run_experiments_on_datasets(
    "rnn_128_128_rnn", 
    lambda ts_length, num_classes: build_simple_rnn_rnn_model(
                                       ts_length, num_classes, 
                                       layers=[128, 128, 128]), 
    simple_rnn_opt_params, 
    add_dim=True, 
    dataset_names=dataset_names)