# Going deep into Human Activity Recognition

**Elia Bonetto, Filippo Rigotto.**

Department of Information Engineering, University of Padova, Italy.

Human Data Analytics, a.y. 2018/2019

## Part 2 - Training of DL models

TEST TO DO:
- augmented
- not-normalized

TO DO:
- plot f1 vs cum probability (see hammerla paper)
- test various loss functions/lr/decay
- lr finder [link](https://medium.com/octavian-ai/how-to-use-the-learning-rate-finder-in-tensorflow-126210de9489)

NETWORKS:

DONE:
- FFNN (2,3 layers, with/without L2, with/without dropout)
- CNN (2,3 layers, with/without L2, with/without dropout, paper networks)
- LSTM (2 layers)
- GRU (1 layer)

DOING / to test:
- CNN - test epoch
- LSTM - test lr, dropout
- RNN - test lr, dropout
- CNN + LSTM
- AE CNN/LSTM

TO DO:
- AE as over with previous models as classifier

AUGMENTATION:
- random shuffle

In [0]:
!nvidia-smi | grep T4

In [0]:
from IPython.display import Image, clear_output
import os
from google.colab import drive
drive.mount('/content/drive/')
clear_output()
os.chdir("/content/drive/My Drive/hda-project")
#!ls

In [0]:
!pip install telepot
clear_output()

from pprint import pprint
import json
from datetime import datetime
import pytz

import math
import h5py
import numpy as np
import scipy as sp
import scipy.io

import pandas as pd
pd.set_option('display.precision',3)
pd.set_option('display.float_format', '{:0.3f}'.format)

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
mpl.rcParams['figure.figsize'] = (10,6)
mpl.rcParams['axes.grid'] = True

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Dropout
from tensorflow.keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, LSTM, GRU
from tensorflow.keras.layers import TimeDistributed, RepeatVector, UpSampling1D, UpSampling2D, ZeroPadding2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, LambdaCallback
from tensorflow.keras.utils import to_categorical, plot_model
from tg_callback import TelegramCallback

#import logging
#logging.getLogger('tensorflow').disabled = True

import tensorflow.keras.backend as K
K.set_image_data_format('channels_last')

## Data loading

Start from previously preprocessed data, altrady splitted in train and test parts.

In [0]:
map_decode = {
    0: 'running',
    1: 'walking',
    2: 'jumping',
    3: 'standing',
    4: 'sitting',
    5: 'lying',
    6: 'falling'
}
num_classes = len(map_decode)

In [0]:
with h5py.File('dataset/ARS-train-test-body-framed-aug-rot-per-norm.h5','r') as h5f:
    X_train = h5f['X_train'][:] # IMU data w.r.t body frame
    X_test  = h5f['X_test'][:]  # activities (labels)
    Y_train = h5f['Y_train'][:]
    Y_test  = h5f['Y_test'][:]

num_data = len(X_train)
print("X_train shape: " + str(X_train.shape))
print("Y_train shape: " + str(Y_train.shape))
print("X_test shape:  " + str(X_test.shape))
print("Y_test shape:  " + str(Y_test.shape))

# categorical structures are needed for the loss function to work properly
# original test classes are needed for prediction steps
Y_train_orig = Y_train.copy()
Y_test_orig  = Y_test.copy()
Y_train = to_categorical(Y_train, num_classes=num_classes, dtype=np.uint8)
Y_test  = to_categorical(Y_test,  num_classes=num_classes, dtype=np.uint8)

## Training and evaluation

Precision, recall and F1 score are implemented referring to Tensorflow backend and are passed as custom metrics to track during training and evaluation of models.

We further define three metrics to save best models:

$sm1 = accuracy+precision+recall \quad sm2 = \frac{accuracy+precision+recall}{loss} \quad sm3 = \frac{accuracy}{loss}$

where $accuracy$ is `keras.metrics.categorical_accuracy` and $loss$ is `keras.losses.categorical_crossentropy`.

The `run_model` function takes care of bootstrap, training and evaluation processes for a given Keras model and configuration.

In [0]:
def recall(y_true, y_pred):
    """Recall metric, batch-wise average."""
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision(y_true, y_pred):
    """Precision metric, batch-wise average."""
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1(y_true, y_pred):
    """F1 score, based on precision and recall metrics."""
    prc = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    return 2*((prc*rec)/(prc+rec+K.epsilon()))

# -----

def sum_metric_1(y_true, y_pred):
    # accuracy+precision+recall
    accu = K.cast(K.equal(K.argmax(y_true, axis=-1),
                          K.argmax(y_pred, axis=-1)),
                  K.floatx()) # from Keras source code
    prc = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    return accu + prc + rec

def sum_metric_2(y_true, y_pred):
    apr = sum_metric_1(y_true, y_pred)
    loss = K.mean(K.categorical_crossentropy(y_true, y_pred))
    return apr / loss

def sum_metric_3(y_true, y_pred):
    accu = K.cast(K.equal(K.argmax(y_true, axis=-1),
                          K.argmax(y_pred, axis=-1)), K.floatx())
    loss = K.mean(K.categorical_crossentropy(y_true, y_pred))
    return accu / loss
    
def save_high_acc_low_loss(epoch,logs):
    global output_dir
    global max_accu
    global min_loss
        
    vaccu = logs['val_acc']
    vloss = logs['val_loss']
    
    if vaccu < max_accu and vaccu > max_accu - 0.01 and vloss < min_loss:
        model.save(os.path.join(out_folder, 'model-bestal.h5'))
    
    if vaccu > max_accu:
        max_accu = vaccu
    if vloss < min_loss:
        min_loss = vloss

# -----

def per_class_accuracy(y_true, y_preds, class_labels):
    # for reference. confusion matrix diag is used instead
    return [np.mean([
            (y_true[pred_idx] == np.round(y_pred)) 
                for pred_idx, y_pred in enumerate(y_preds) 
                    if y_true[pred_idx] == int(class_label)
        ]) for class_label in class_labels]

def halfLRafterEpoch(epoch):
    # for reference. lambda func is used instead
    initial_lrate = 0.1
    drop_rate = 0.5
    epochs_drop = 10.0
    return initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))


In [0]:
def run_model(model, config, 
              x_train = X_train, y_train = Y_train, 
              x_test = X_test, y_test = Y_test, y_test_orig = Y_test_orig):
    """Generic method to build a model, train and evaluate performances."""

    global out_folder
    out_folder = os.path.join('output', datetime.now(pytz.timezone('Europe/Rome')).strftime('%y%m%d-%H%M%S')+'_'+model.name)
    if not os.path.exists(out_folder):
        os.mkdir(out_folder)
    
    # print and save model summary
    print('Summary')
    model.summary(line_length=100)
    with open(os.path.join(out_folder, 'summary.txt'),'w') as sfile:
        model.summary(line_length=100, print_fn=lambda x: sfile.write(x+'\n'))
    plot_model(model, to_file=os.path.join(out_folder, 'model.png'), show_shapes=True)

    # save config
    with open(os.path.join(out_folder, 'config.json'),'w') as cfile:
        json.dump(config, cfile, indent=2)


    if 'lr' not in config:
        # use default values: ass the string
        opt = config['optimizer']
    else:
        # setup optimizer with supplied parameters
        if 'sgdm' in config['optimizer']:
            opt = SGD(lr=config['lr'], momentum=config['momentum'], decay=config['decay'])

        elif 'sgd' in config['optimizer']:
            if 'decay' in config:
                opt = SGD(lr=config['lr'], decay=config['decay'])
            else:
                opt = SGD(lr=config['lr'])

        elif 'adam' in config['optimizer']:
            if 'decay' in config:
                opt = Adam(lr=config['lr'], decay=config['decay'])
            else:
                opt = Adam(lr=config['lr'])

        elif 'rmsprop' in config['optimizer']:
            if 'decay' in config:
                opt = RMSprop(lr=config['lr'], decay=config['decay'])
            else:
                opt = RMSprop(lr=config['lr'])


    # compile model
    model.compile(optimizer=opt,
                  loss=config['loss'],
                  metrics=['accuracy', precision, recall, f1, sum_metric_1, sum_metric_2, sum_metric_3])

    # add requested callbacks for model, starting from checkpointing
    callbacks = [
        ModelCheckpoint(os.path.join(out_folder, 'model-best.h5'), 
                        monitor='val_acc', mode='max', save_best_only=True, verbose=0),
        ModelCheckpoint(os.path.join(out_folder, 'model-bestsm1.h5'), 
                        monitor='val_sum_metric_1', mode='max', save_best_only=True, verbose=0),
        ModelCheckpoint(os.path.join(out_folder, 'model-bestsm2.h5'), 
                        monitor='val_sum_metric_2', mode='max', save_best_only=True, verbose=0),
        ModelCheckpoint(os.path.join(out_folder, 'model-bestsm3.h5'), 
                        monitor='val_sum_metric_3', mode='max', save_best_only=True, verbose=0),
        LambdaCallback(on_epoch_end=save_high_acc_low_loss)
    ]

    if config['lr_step'] > 0:
        # halves lr every lr_step epochs (starting lr = 0.01)
        callbacks.append(LearningRateScheduler(
            lambda epoch: 0.01 * math.pow(0.5, math.floor((1+epoch)/config['lr_step'])),
            verbose=1))
        
    if config['early_stop'] > 0:
        # stop if val_loss does has not diminished after num epochs
        callbacks.append(EarlyStopping(patience=config['early_stop']))
        
    if config['tg']:
        # telegram notification when training stops
        callbacks.append(TelegramCallback(name=model.name))
    
    # train model, save final state and history
    print('\nTraining')

    # by default, use test set also as validation set
    x_val = x_test
    y_val = y_test    
    if 'use_validation' in config and config['use_validation']:
        # generate validation set excluding it from the training set
        x_train, x_val, y_train, y_val = \
            train_test_split(x_train, y_train, test_size=0.2, random_state=1, stratify=y_train)     
    
    # global variables to save model in LambdaCallback
    global max_accu
    global min_loss
    max_accu = 0.0
    min_loss = 1e10
    history = model.fit(x=x_train, y=y_train,
                        shuffle=config['shuffle'],
                        epochs=config['epochs'],
                        batch_size=config['batch_size'],
                        callbacks=callbacks, # if len(callbacks) > 0 else None,
                        validation_data=(x_val,y_val))
    
    model.save(os.path.join(out_folder, 'model-final.h5'))
    
    with open(os.path.join(out_folder, 'history.json'),'w') as hfile:
        hpd = pd.DataFrame(history.history)
        json.dump(json.loads(hpd.to_json()), hfile, indent=2)

        #json.dump(history.history, hfile, indent=2)
        # native json module can't handle float32 objects
        # pandas can and is used as a preprocessor to json module

    # plot and save loss, accuracy and metrics (precision, recall, f1)
    print('\nLoss, accuracy and metrics plots')
    plt.figure()
    plt.plot(history.history['loss'], label='Training')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.tight_layout()
    fname = os.path.join(out_folder, 'plot-loss')
    plt.savefig(fname+'.png')
    plt.savefig(fname+'.pdf', format='pdf')

    plt.figure()
    plt.plot(history.history['acc'], label='Training')
    plt.plot(history.history['val_acc'], label='Validation')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.ylim((0,1))
    plt.tight_layout()
    fname = os.path.join(out_folder, 'plot-accuracy')
    plt.savefig(fname+'.png')
    plt.savefig(fname+'.pdf', format='pdf')

    plt.figure()
    plt.plot(history.history['precision'], label='Precision Tr')
    plt.plot(history.history['val_precision'], label='Precision Val')
    plt.plot(history.history['recall'], label='Recall Tr')
    plt.plot(history.history['val_recall'], label='Recall Val')
    plt.plot(history.history['f1'], label='F1 Tr')
    plt.plot(history.history['val_f1'], label='F1 Val')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Metrics')
    plt.tight_layout()
    fname = os.path.join(out_folder, 'plot-metrics')
    plt.savefig(fname+'.png')
    plt.savefig(fname+'.pdf', format='pdf')
        
    # evaluate models, save results
    for model_suffix in ['final','best','bestsm1','bestsm2','bestsm3','bestal']:
        model_name = os.path.join(out_folder, 'model-{}.h5'.format(model_suffix))
        if not os.path.exists(model_name): continue

        model = load_model(model_name, custom_objects={'precision': precision, 'recall': recall, 'f1': f1,
                                       'sum_metric_1': sum_metric_1, 'sum_metric_2': sum_metric_2, 'sum_metric_3': sum_metric_3})
        
        print(f"\nEvaluation of {model_suffix}")
        metrics = model.evaluate(x=x_test, y=y_test)
        metrics = dict(zip(model.metrics_names, metrics)) # build a dict adding names
        metrics['name'] = model.name
        metrics['type'] = model_suffix

        # get predictions
        preds = model.predict(x=x_test)
        y_pred = np.argmax(preds, axis=1)

        classes_num = list(map(str,range(num_classes))) # classes list as str integers
        classes = list(map_decode.values())
        metrics['classes'] = classes

        # build per-class metrics and confusion matrix
        cr = classification_report(y_test_orig, y_pred, output_dict=True)
        
        cm = confusion_matrix(y_test_orig, y_pred)
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # normalization

        acc_class = [cm[i,i] for i in range(num_classes)]
        prc_class = [cr[cl]['precision'] for cl in cr if cl in classes_num] # exclude avgs
        rec_class = [cr[cl]['recall']    for cl in cr if cl in classes_num]
        f1_class  = [cr[cl]['f1-score']  for cl in cr if cl in classes_num]

        metrics['acc-class'] = acc_class
        metrics['precision-class'] = prc_class
        metrics['recall-class'] = rec_class
        metrics['f1-class'] = f1_class
        metrics['averages'] = cr['macro avg']
        metrics['weighted-averages'] = cr['weighted avg']
        del metrics['averages']['support']
        del metrics['weighted-averages']['support']
        print()
        pprint(metrics)

        # conversion to pure python float before saving to json
        for item in metrics:
            if type(metrics[item]) == np.float64 or type(metrics[item]) == np.float32:
                metrics[item] = float(metrics[item])

        # save evaluation dict, confusion matrix and its plot
        with open(os.path.join(out_folder, f"evaluation-{model_suffix}.json"),'w') as efile:
            json.dump(metrics, efile, indent=2)

        np.save(os.path.join(out_folder, f"confusion-{model_suffix}.npy"), cm)

        plt.figure()
        sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=classes, yticklabels=classes)
        plt.xlabel('Predicted class')
        plt.ylabel('True class')
        plt.tight_layout()
        fname = os.path.join(out_folder, f"plot-confusion-{model_suffix}")
        plt.savefig(fname+'.png')
        plt.savefig(fname+'.pdf', format='pdf')

## Standard models

Keras models for simple or standard architectures.

### Fully connected

In [0]:
def TwoDense_model(input_shape, num_classes, l2_reg=None, dropout_rate=None):
    name = 'TwoDense'
    if l2_reg: name += '-reg{}'.format(l2_reg)
    if dropout_rate: name += '-do{}'.format(dropout_rate)
    
    model = Sequential(name=name)
    model.add(Flatten(input_shape=input_shape))
    
    model.add(Dense(512, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))

    model.add(Dense(num_classes, activation='softmax'))
    return model

In [0]:
def ThreeDense_model(input_shape, num_classes, l2_reg=None, dropout_rate=None):
    name = 'ThreeDense'
    if l2_reg: name += '-reg{}'.format(l2_reg)
    if dropout_rate: name += '-do{}'.format(dropout_rate)
    
    model = Sequential(name=name)
    model.add(Flatten(input_shape=input_shape))
    
    model.add(Dense(512, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(256, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [0]:
def FiveDense_model(input_shape, num_classes, l2_reg=None, dropout_rate=None):
    name = 'FiveDense'
    if l2_reg: name += '-reg{}'.format(l2_reg)
    if dropout_rate: name += '-do{}'.format(dropout_rate)
    
    model = Sequential(name=name)
    model.add(Flatten(input_shape=input_shape))
    
    model.add(Dense(512, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(256, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(128, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(64, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(l2_reg) if l2_reg else None))
    if dropout_rate: model.add(Dropout(dropout_rate))
    
    model.add(Dense(num_classes, activation='softmax'))
    return model

### Convolutional

In [0]:
def Conv1D_1C1D_model(input_shape, num_classes, l2_reg=None):
    name = 'Conv1D-1C1D'
    if l2_reg: name += '-reg{}'.format(l2_reg)

    return Sequential([
        Conv1D(64, 5, input_shape=input_shape, 
               kernel_regularizer=l2(l2_reg) if l2_reg else None), # shape == (batch, steps, channels)
        BatchNormalization(axis=1),
        Activation('relu'),
        MaxPooling1D(2),
        
        Flatten(),
        Dense(num_classes, activation='softmax')
    ], name=name)

In [0]:
def Conv1D_1C2D_model(input_shape, num_classes, l2_reg=None):
    name = 'Conv1D-1C2D'
    if l2_reg: name += '-reg{}'.format(l2_reg)

    return Sequential([
        Conv1D(64, 5, input_shape=input_shape,
              kernel_regularizer=l2(l2_reg) if l2_reg else None),
        BatchNormalization(axis=1),
        Activation('relu'),
        MaxPooling1D(2),
        
        Flatten(),
        Dense(128, activation='relu',
              kernel_regularizer=l2(l2_reg) if l2_reg else None),
        Dense(num_classes, activation='softmax')
    ], name=name)

In [0]:
def Conv1D_2C1D_model(input_shape, num_classes, l2_reg=None, dropout_rate=None):
    name = 'Conv1D-2C1D'
    if l2_reg: name += '-reg{}'.format(l2_reg)
    if dropout_rate: name +='-do{}'.format(dropout_rate)
        
    model = Sequential(name=name)
    model.add(Conv1D(64, 5, input_shape=input_shape,
                     kernel_regularizer=l2(l2_reg) if l2_reg else None))
    model.add(BatchNormalization(axis=1))
    model.add(Activation('relu'))
    if dropout_rate:
        model.add(Dropout(dropout_rate))
    model.add(MaxPooling1D(2))
        
    model.add(Conv1D(32, 5,
                     kernel_regularizer=l2(l2_reg) if l2_reg else None))
    model.add(BatchNormalization(axis=1))
    model.add(Activation('relu'))
    if dropout_rate:
        model.add(Dropout(dropout_rate))
    model.add(MaxPooling1D(2))
        
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [0]:
def Conv1D_2C2D_model(input_shape, num_classes, l2_reg=None, dropout_rate=None):
    name = 'Conv1D-2C2D'
    if l2_reg: name += '-reg{}'.format(l2_reg)
    if dropout_rate: name +='-do{}'.format(dropout_rate)
        
    model = Sequential(name=name)
    model.add(Conv1D(64, 5, input_shape=input_shape,
                     kernel_regularizer=l2(l2_reg) if l2_reg else None))
    model.add(BatchNormalization(axis=1))
    model.add(Activation('relu'))
    if dropout_rate:
        model.add(Dropout(dropout_rate))
    model.add(MaxPooling1D(2))
        
    model.add(Conv1D(32, 5,
                     kernel_regularizer=l2(l2_reg) if l2_reg else None))
    model.add(BatchNormalization(axis=1))
    model.add(Activation('relu'))
    if dropout_rate:
        model.add(Dropout(dropout_rate))
    model.add(MaxPooling1D(2))
        
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    if dropout_rate:
        model.add(Dropout(dropout_rate))
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [0]:
def Conv1D_Chen_model(input_shape, num_classes):
    """Chen and Xue, 'A DL approach to HAR based on single accelerometer'"""
    return Sequential([
        Conv1D(18, 2, activation='relu', input_shape=input_shape), # depth, kernel
        MaxPooling1D(2), # size, strides
        
        Conv1D(36, 2, activation='relu'),
        MaxPooling1D(2),
        
        Conv1D(24, 2, activation='relu'),
        MaxPooling1D(2),
        
        Flatten(),
        Dense(num_classes, activation='softmax')
    ], name='Conv1D-Chen')

In [0]:
def Conv1D_Rueda_model(input_shape, num_classes):
    """Moya Rueda et al., 'CNN for HAR using body-worn sensors'"""
    return Sequential([
        Conv1D(64, 5, activation='relu', input_shape=input_shape),
        Conv1D(64, 5, activation='relu'),
        MaxPooling1D(2),
        
        Conv1D(64, 5, activation='relu'),
        Conv1D(64, 5, activation='relu'),
        MaxPooling1D(2),
        
        Flatten(),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(512, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ], name='Conv1D-Rueda')

In [0]:
def Conv2D_Bevilacqua_model(input_shape, num_classes):
    """Bevilacqua et al., 'HAR with CNNs'"""
    return Sequential([
        ZeroPadding2D((1,2), input_shape=input_shape),
        
        Conv2D(10, (3,5)), # depth kernel
        BatchNormalization(axis=2),
        Activation('relu'),
        MaxPooling2D((3,3),(1,1)), # size strides
        
        Conv2D(2, (2,4)),
        BatchNormalization(axis=2),
        Activation('relu'),
        MaxPooling2D((2,2),(1,1)),
        
        Conv2D(2, (2,2)),
        BatchNormalization(axis=2),
        Activation('relu'),
        MaxPooling2D((3,2),(1,2)),
        
        Flatten(),
        Dense(500, activation='relu'),
        Dropout(0.5),
        Dense(250, activation='relu'),
        Dropout(0.5),
        Dense(125, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ], name='Conv2D-Bevilacqua')

In [0]:
def Conv2D_Ha_model(input_shape, num_classes):
    """Ha, Yun and Choi, 'Multi-modal CNN for AR'"""
    return Sequential([
        Conv2D(32, (4,4), activation='relu', input_shape=input_shape),
        MaxPooling2D((3,3),(1,1)),
        
        Conv2D(64, (5,5), activation='relu'),
        MaxPooling2D((3,3),(1,1)),
        
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ], name='Conv2D-Ha')

### Recurrent

In [0]:
def TwoLSTM_modelv1(input_shape, num_classes):
    return Sequential([
        LSTM(128, return_sequences=True,  stateful=False, batch_input_shape=input_shape),
        LSTM(64, return_sequences=False, stateful=False),
        Dense(num_classes, activation='softmax')
    ], name='TwoLSTMv1')

In [0]:
def TwoLSTM_modelv0(input_shape, num_classes):
    return Sequential([
        LSTM(128, return_sequences=True,  stateful=False, batch_input_shape=input_shape),
        Dropout(0.4),
        LSTM(64, return_sequences=False, stateful=False),
        Dense(num_classes, activation='softmax')
    ], name='TwoLSTMv0')

In [0]:
def TwoLSTM_model(input_shape, num_classes):
    return Sequential([
        LSTM(128, return_sequences=True,  stateful=False, batch_input_shape=input_shape),
        Dropout(0.2),
        LSTM(64, return_sequences=False, stateful=False),
        Dense(num_classes, activation='softmax')
    ], name='TwoLSTM')

In [0]:
def TwoLSTM_modelv2(input_shape, num_classes):
    return Sequential([
        LSTM(256, return_sequences=True,  stateful=False, batch_input_shape=input_shape),
        Dropout(0.2),
        LSTM(128, return_sequences=False, stateful=False),
        Dense(num_classes, activation='softmax')
    ], name='TwoLSTMv2')

In [0]:
def TwoLSTM_modelv4(input_shape, num_classes):
    return Sequential([
        LSTM(256, return_sequences=True,  stateful=False, batch_input_shape=input_shape),
        Dropout(0.1),
        LSTM(128, return_sequences=False, stateful=False),
        Dense(num_classes, activation='softmax')
    ], name='TwoLSTMv4')

In [0]:
def TwoLSTM_modelv3(input_shape, num_classes):
    return Sequential([
        LSTM(256, return_sequences=True,  stateful=False, batch_input_shape=input_shape),
        Dropout(0.2),
        LSTM(256, return_sequences=False, stateful=False),
        Dense(num_classes, activation='softmax')
    ], name='TwoLSTMv3')

### GRU (TODO delete this label)

In [0]:
def OneGRU_model(input_shape, num_classes):
    return Sequential([
        GRU(128, input_shape=input_shape),
        Dropout(0.2), #prev commented
        Dense(num_classes, activation='softmax')
    ], name='OneGRU')

## Mixed models

Models composed of two or more different architecture types.

### CNN + LSTM

In [0]:
def CNN_LSTM_model(input_shape, num_classes):
    filters = 256 #1287
    LSTM_feat = 128 
    #TimeDistributed: This wrapper applies a layer to every temporal slice of an input.
    #The input should be at least 3D, and the dimension of index one will be considered to be the temporal dimension.
    return Sequential([
        TimeDistributed(Conv1D(filters=filters, kernel_size=1, activation='relu'), input_shape=input_shape),
        TimeDistributed(Conv1D(filters=filters, kernel_size=3, activation='relu')),
        TimeDistributed(Dropout(0.1)),
        TimeDistributed(MaxPooling1D(pool_size=2)),
        TimeDistributed(Flatten()),
        LSTM(LSTM_feat),
        Dense(num_classes, activation='softmax')
    ], name ='CNN-LSTM')

## AutoEncoders

### CNN AutoEncoder

In [0]:
def CNN_AE_model(input_shape, num_features):
    return Sequential([
        Conv1D(filters=128, kernel_size=1, activation='relu', input_shape=input_shape, padding='same'),
        Conv1D(filters=64, kernel_size=1, activation='relu', padding='same'),
        Conv1D(filters=64, kernel_size=1, activation='relu', padding='same'),
        Conv1D(filters=128, kernel_size=1, activation='relu', padding='same'),
        Conv1D(filters=num_features, kernel_size=1, activation='relu') #TODO Try with softmax
    ], name='CNN_AE')

### LSTM AutoEncoder

In [0]:
def LSTM_AE_model(input_shape, num_features):
    return Sequential([
        LSTM(128, activation='relu', input_shape=input_shape, return_sequences=True), #TODO try with 64, 32/ 128,64
        LSTM(64, activation='relu', return_sequences=True),
        #LSTM(64, activation='relu', return_sequences=True),
        LSTM(128, activation='relu', return_sequences=True),
        #LSTM(num_features, activation='relu', return_sequences=True),
        TimeDistributed(Dense(num_features, activation='relu')) #TODO Try with softmax
    ], name='LSTM-AE')

### CNN + LSTM AutoEncoder


In [0]:
def CNN_LSTM_AE_model(input_shape, num_features):
    return Sequential([
        Conv1D(filters=128, kernel_size=1, activation='relu', input_shape=input_shape),
        Conv1D(filters=64, kernel_size=1, activation='relu'),
        MaxPooling1D(pool_size=1),
        LSTM(128, activation='relu', return_sequences=True),
        TimeDistributed(Dense(num_features, activation='relu')) #TODO Try with softmax
    ], name='CNN-LSTM-AE')

### SVM tests to be moved below or out


In [0]:
DL_input = Input(input_shape)
DL_model = DL_input
for layer in model.layers[:3]:
    DL_model = layer(DL_model)
DL_model = Model(inputs=DL_input, outputs=DL_model)
DL_model.summary()
for layer in DL_model.layers:
    layer.trainable = False
DL_model.summary()

#DL_model.get_weights()[0]
#model.get_weights()[0]

data = DL_model.predict(x_tr, verbose = 1)
data = data.reshape(data.shape[0],data.shape[2])

flattened_y = np.repeat(Y_train_orig, 128, axis=0)
print(flattened_y.shape)

In [0]:
# still to test
from sklearn import datasets, svm
from sklearn.kernel_approximation import Nystroem
clf = svm.LinearSVC(verbose=True)
feature_map_nystroem = Nystroem(gamma=.2,
                                random_state=1,
                                n_components=3)
data_transformed = feature_map_nystroem.fit_transform(data)
print(data_transformed.shape)
clf.fit(data_transformed, flattened_y)

clf.score(data_transformed, flattened_y)

In [0]:
b = data.as_matrix()

In [0]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV

#TO BE TRIED
classifier=svm.SVC() #or LinearSVC

parameters=[{'kernel': ['rbf'], 'gamma': [0.001, 0.0001], 'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]

modelCV=GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,verbose=4)
modelCV.fit(data,flattened_y)


# STILL TO BE ADAPTED
from sklearn.metrics import accuracy_score
ypred=model.predict(xtest)
accuracy=accuracy_score(ytest,ypred)
print('Best Parameters: '+ str(modelCV.best_params_))
print('Accuracy Score: '+ str(accuracy*100) + ' %')

## TODO ENCODER + PREVIOUS CLASSIFICATION

## Tests

Here models are trained according to selected configuration on the dataset split.

The configuration is a dictionary that allow to set the model's parameters and callbacks:

- `optimizer` (str): the selected optimizer for training. One of [`sgd`, `sgdm`, `adam`, `rmsprop`].
- `lr` (float): the learning rate. If omitted, standard values are used for the optimizer.
- `decay` (float): learning rate decay. Valid for all optimizers with a supplied `lr`.
- `momentum` (float): gradient momentum. Only valid for `sgdm` optimizer.
- `loss` (str): type of loss to minimize.

- `epochs` (int) and `batch_size` (int).
- `use_validation` (bool): whether to derive validation set from training set instead of using test set to validate training.
- `shuffle` (bool): whether to shuffle data in batches or keep the same retrieval order.

- `lr_step` (int): epochs after which the learning rate is halved. Set to 0 to disable.
- `early_stop` (int): number of epochs after which to stop training if validation loss does not decrease anymore. Set to 0 to disable.
- `tg` (bool): whether to enable Telegram notification when training finishes.

Models that contain Dense or convolutional layers may use L2 regularization with the optional parameter `l2_reg` to the model function.

Some models may apply dropout if `dropout_rate` is set, some have it enabled by default as part of the network structure.

### Fully connected

Using two dense layers leads to variable accuracy in the range [72.5%, 87.5%] with growing loss if regularization is not applied.

Regularization and dropout lower accuracy values.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = TwoDense_model(input_shape, num_classes)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 125,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = TwoDense_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = TwoDense_model(input_shape, num_classes, dropout_rate=0.5)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 60,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = TwoDense_model(input_shape, num_classes, l2_reg=0.01, dropout_rate=0.5)
run_model(model, config)

Adding one dense layer, there is no substantial changes: regularization stabilizes validation loss but lowers the accuracy by 5% from 90% to 85%.

Dropout is not effective in bounding loss and has final accuracy similar to using regularization.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 50,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = ThreeDense_model(input_shape, num_classes)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 125,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = ThreeDense_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = ThreeDense_model(input_shape, num_classes, dropout_rate=0.5)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 30,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = ThreeDense_model(input_shape, num_classes, l2_reg=0.01, dropout_rate=0.5)
run_model(model, config)

Using five dense layers the network is too deep for the assigned task and it does not match interesting results.

Network reaches 90% accuracy with visible overfit, and applying dropout or regularization lowers accuracy to 85 and 75% resp.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 50,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = FiveDense_model(input_shape, num_classes)
run_model(model, config)

model = FiveDense_model(input_shape, num_classes, l2_reg=0.01, dropout_rate=0.5)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 300,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

model = FiveDense_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

model = FiveDense_model(input_shape, num_classes, dropout_rate=0.5)
run_model(model, config)

### Convolutional

Using 1 convolutional and 1 dense layer we obtain poor results: overfitting and growing validation loss.

Accuracy between 85 and 90%. Regularization helps but not too much.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = Conv1D_1C1D_model(input_shape, num_classes)
run_model(model, config)

model = Conv1D_1C1D_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

Using 1 conv. and 2 dense layers, we achieve 95% accuracy. Regularization controls loss growing but lowers accuracy to 90%.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 100,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = Conv1D_1C2D_model(input_shape, num_classes)
run_model(model, config)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 125,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = Conv1D_1C2D_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

Two conv. layers and one final dense layer: of the four configuration, applying only dropout is the best setup and leads to 96% accuracy.

Adding regularization is not helpful. Standard model reaches 94% accuracy with growing loss.

_Selected as one of the best CNN model for the paper._

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = Conv1D_2C1D_model(input_shape, num_classes)
run_model(model, config)

model = Conv1D_2C1D_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

# selected for report
model = Conv1D_2C1D_model(input_shape, num_classes, dropout_rate=0.3)
run_model(model, config)

model = Conv1D_2C1D_model(input_shape, num_classes, l2_reg=0.01, dropout_rate=0.3)
run_model(model, config)

In [0]:
# this contains only the selected configuration
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = Conv1D_2C1D_model(input_shape, num_classes, dropout_rate=0.3)
run_model(model, config)

Two conv. layers and two dense layers: same conclusions as in previous setup.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200,
    'batch_size': 32,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = Conv1D_2C2D_model(input_shape, num_classes)
run_model(model, config)

model = Conv1D_2C2D_model(input_shape, num_classes, l2_reg=0.01)
run_model(model, config)

model = Conv1D_2C2D_model(input_shape, num_classes, dropout_rate=0.3)
run_model(model, config)

model = Conv1D_2C2D_model(input_shape, num_classes, l2_reg=0.01, dropout_rate=0.3)
run_model(model, config)

Model from Chen's paper: uses only accelerometer data and overfits after 150 epochs, achieving no more than 75% accuracy.

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150, # tuned
    'batch_size': 1024,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

X_tr = X_train.copy()
X_tr = X_tr[:,:,:3] # only take accelerometer data
print(f'{X_train.shape} > {X_tr.shape}')

X_te = X_test.copy()
X_te = X_te[:,:,:3]
print(f'{X_test.shape} > {X_te.shape}')

input_shape = (X_tr.shape[1], X_tr.shape[2])
model = Conv1D_Chen_model(input_shape, num_classes)
run_model(model, config, x_train = X_tr, x_test = X_te)

Model from Moya-Rueda's paper: very bad performance (no more than 65% accuracy).

In [0]:
config = {
    #'optimizer': 'adam',
    'optimizer': 'rmsprop',
    'lr': 0.01,
    'decay': 0.95,
    'loss': 'categorical_crossentropy',
    'epochs': 100,
    'batch_size': 100,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 25,
    'early_stop': 0,
    'tg': False
}

input_shape = (X_train.shape[1], X_train.shape[2])
model = Conv1D_Rueda_model(input_shape, num_classes)
run_model(model, config)

Model from Bevilacqua's paper: achieves $\approx$ 86% accuracy using 2D convolutions.

In [0]:
X_tr = X_train.copy()
X_tr = np.swapaxes(X_tr,1,2)
X_tr = X_tr[...,None] # add last dimension
print(f'{X_train.shape} > {X_tr.shape}')

X_te = X_test.copy()
X_te = np.swapaxes(X_te,1,2)
X_te = X_te[...,None]
print(f'{X_test.shape} > {X_te.shape}')

input_shape = (X_train.shape[2], X_train.shape[1], 1)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150, # tuned. visible overfit if continuing
    'batch_size': 1024,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = Conv2D_Bevilacqua_model(input_shape, num_classes)
run_model(model, config, x_train = X_tr, x_test = X_te)

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200, # tuned. stable
    'batch_size': 1024,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = Conv2D_Bevilacqua_model(input_shape, num_classes)
run_model(model, config, x_train = X_tr, x_test = X_te)

Model from Ha's paper: achieves $\approx$ 95% accuracy padding columns to separate signals data from different sensors.

_Best CNN model, selected for paper._ 

In [0]:
X_tr = X_train.copy()
X_tr = np.swapaxes(X_tr,1,2)
# pad three 'cols' of zeros between each sensor data
# > ax ay az 0 0 0 gx gy gz 0 0 0 mx my mz 
for index in [9,6,3]:
    for rep in range(3):
        X_tr = np.insert(X_tr, index, 0, axis=1)
X_tr = X_tr[...,None] # add dim
print(f'{X_train.shape} > {X_tr.shape}')

X_te = X_test.copy()
X_te = np.swapaxes(X_te,1,2)
for index in [9,6,3]:
    for rep in range(3):
        X_te = np.insert(X_te, index, 0, axis=1)
X_te = X_te[...,None]
print(f'{X_test.shape} > {X_te.shape}')

input_shape = (X_tr.shape[1], X_tr.shape[2],1)

"""
# no validation version
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 100, # tuned. slightly overfitting but stable val. accuracy
    'batch_size': 1024,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = Conv2D_Ha_model(input_shape, num_classes)
run_model(model, config, x_train = X_tr, x_test = X_te) # TUNED
"""

config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 200, # tuned. slightly overfitting but stable val. accuracy
    'batch_size': 1024,
    'use_validation': True,
    'shuffle': True,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}
model = Conv2D_Ha_model(input_shape, num_classes)
run_model(model, config, x_train = X_tr, x_test = X_te)

### Recurrent

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[1], X_train.shape[2])
model = TwoLSTM_modelv1(input_shape, num_classes)
run_model(model, config)

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[1], X_train.shape[2])
model = TwoLSTM_modelv0(input_shape, num_classes)
run_model(model, config)

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[1], X_train.shape[2])
model = TwoLSTM_model(input_shape, num_classes)
run_model(model, config)

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[1], X_train.shape[2])
model = TwoLSTM_modelv2(input_shape, num_classes)
run_model(model, config)

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[1], X_train.shape[2])
model = TwoLSTM_modelv3(input_shape, num_classes)
run_model(model, config)

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[1], X_train.shape[2])
model = TwoLSTM_modelv4(input_shape, num_classes)
run_model(model, config)

### GRU (TODO delete this label)

In [0]:
#drop 0.2 softmax - bs 100 (vs 200)
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 250,
    'batch_size': 100,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

input_shape = (None, X_train.shape[-1])
model = OneGRU_model(input_shape, num_classes)
run_model(model, config)

### CNN + LSTM

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'categorical_crossentropy',
    'epochs': 150,
    'batch_size': 300,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False,
}

folds, fold_len = 4, 32 # 4*32 = 128

input_shape = (folds, fold_len, X_train.shape[-1])
model = CNN_LSTM_model(input_shape, num_classes)
run_model(model, config,
          x_train = X_train.reshape(X_train.shape[0], folds, fold_len, X_train.shape[-1]),
          x_test = X_test.reshape(X_test.shape[0], folds, fold_len, X_train.shape[-1]))

## TODO provare con reshape [0], 128*9

### CNN AutoEncoder

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'mse',
    'epochs': 20,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

x_tr = X_train.reshape((X_train.shape[0]*X_train.shape[1], X_train.shape[2]))
x_te = X_test.reshape((X_test.shape[0]*X_test.shape[1], X_test.shape[2]))
y_tr = X_train.reshape((X_train.shape[0]*X_train.shape[1], X_train.shape[2]))
y_te = X_test.reshape((X_test.shape[0]*X_test.shape[1], X_test.shape[2]))
x_tr = x_tr.reshape(-1,1,9)
x_te = x_te.reshape(-1,1,9)
y_tr = y_tr.reshape(-1,1,9)
y_te = x_te.reshape(-1,1,9)

input_shape = (x_tr.shape[1], x_tr.shape[2])
model = CNN_AE_model(input_shape, X_train.shape[2])
run_model(model, config, x_train=x_tr,  y_train=y_tr, x_test=x_te, y_test=y_te, y_test_orig = x_te)

### LSTM AutoEncoder

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'mse',
    'epochs': 20,
    'batch_size': 200,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

x_tr = X_train.reshape((X_train.shape[0]*X_train.shape[1], X_train.shape[2]))
x_te = X_test.reshape((X_test.shape[0]*X_test.shape[1], X_test.shape[2]))
y_tr = X_train.reshape((X_train.shape[0]*X_train.shape[1], X_train.shape[2]))
y_te = X_test.reshape((X_test.shape[0]*X_test.shape[1], X_test.shape[2]))
x_tr = x_tr.reshape(-1,1,9)
x_te = x_te.reshape(-1,1,9)
y_tr = y_tr.reshape(-1,1,9)
y_te = x_te.reshape(-1,1,9)

input_shape = (x_tr.shape[1], x_tr.shape[2])
model = LSTM_AE_model(input_shape, X_train.shape[2])
run_model(model, config, x_train=x_tr,  y_train=y_tr, x_test=x_te, y_test=y_te, y_test_orig = x_te)

### CNN + LSTM AutoEncoder

In [0]:
config = {
    'optimizer': 'adam',
    'loss': 'mse',
    'epochs': 20,
    'batch_size': 100,
    'shuffle': False,
    'lr_step': 0,
    'early_stop': 0,
    'tg': False
}

x_tr = X_train.reshape((X_train.shape[0]*X_train.shape[1], X_train.shape[2]))
x_te = X_test.reshape((X_test.shape[0]*X_test.shape[1], X_test.shape[2]))
y_tr = X_train.reshape((X_train.shape[0]*X_train.shape[1], X_train.shape[2]))
y_te = X_test.reshape((X_test.shape[0]*X_test.shape[1], X_test.shape[2]))
x_tr = x_tr.reshape(-1,1,9)
x_te = x_te.reshape(-1,1,9)
y_tr = y_tr.reshape(-1,1,9)
y_te = x_te.reshape(-1,1,9)

input_shape = (x_tr.shape[1], x_tr.shape[2])
model = CNN_LSTM_AE_model(input_shape, X_train.shape[2])
run_model(model, config, x_train=x_tr,  y_train=y_tr, x_test=x_te, y_test=y_te, y_test_orig = x_te)

## Tests in pure Tensorflow

### LSTM

In [0]:
# model definition

features = 32 # number of hidden layer's features

#batch = 1500 # TODO unused vars
#n_iters = 300
#tot_iters = Y_train.shape[0] * n_iters
#disp_iter = 1000

w = {
    'h' : tf.Variable(tf.random_normal([X_train.shape[2], features])),
    'o' : tf.Variable(tf.random_normal([features, Y_train.shape[1]], mean=1.0))
}
b = {
    'h' : tf.Variable(tf.random_normal([features])),
    'o' : tf.Variable(tf.random_normal([Y_train.shape[1]]))
}

def LSTM(X, w, b):
    # input processing
    X = tf.transpose(X,[1,0,2])         # (batch_size, steps, input)
    X = tf.reshape(X, [-1, X.shape[2]]) # (steps*batch, n_initial_"features")

    X = tf.nn.relu(tf.matmul(X, w['h']) + b['h'])
    X = tf.split(X, X_train.shape[1])
    
    # model
    l_1 = tf.contrib.rnn.BasicLSTMCell(features, forget_bias=1.0, state_is_tuple=True)
    l_2 = tf.contrib.rnn.BasicLSTMCell(features, forget_bias=1.0, state_is_tuple=True)    
    lstm = tf.contrib.rnn.MultiRNNCell([l_1,l_2], state_is_tuple=True)    
    
    # output
    out, state = tf.contrib.rnn.static_rnn(lstm, X, dtype=tf.float32)
    
    return tf.matmul(out[-1], w['o']) + b['o']

In [0]:
# define a dataset object on input
ds_obj = tf.data.Dataset.from_tensor_slices((X_train.astype(np.float32), Y_train.astype(np.float32))).repeat().batch(300)
iter = ds_obj.make_one_shot_iterator()
x, y = iter.get_next()

prediction = LSTM(x, w, b)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(prediction,1), tf.argmax(y,1)),tf.float32))

# losses, optimizer
lr = 0.0025
lambda_l = 0.0015

l2_norm = lambda_l * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
softmax_cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction)) + l2_norm
adam = tf.train.AdamOptimizer(learning_rate=lr).minimize(softmax_cost)

# run training
test_log  = {'loss':[], 'acc':[]}
train_log = {'loss':[], 'acc':[]}
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(1000): #epochs
        _, l, a = sess.run([adam, softmax_cost, accuracy])
        train_log['loss'].append(l)
        train_log['acc'].append(a)
        
        l,a = sess.run([softmax_cost, accuracy], feed_dict={x:X_test.astype(np.float32), y:Y_test.astype(np.float32)})
        test_log['loss'].append(l)
        test_log['acc'].append(a)
        #print("PERFORMANCE ON TEST SET: " + \
        #      "Batch Loss = {}".format(l) + \
        #      ", Accuracy = {}".format(a))
print('Reached {}'.format(max(test_log['acc'])))

# save stuff and plots
out_folder = os.path.join('output', datetime.now(pytz.timezone('Europe/Rome')).strftime('%y%m%d-%H%M%S')+'_LSTM-TF')
if not os.path.exists(out_folder):
    os.mkdir(out_folder)

with open(os.path.join(out_folder, 'history.json'),'w') as hfile:
    json.dump({'training':train_log, 'validation':test_log}, hfile, indent=2)

plt.figure()
plt.plot(train_log['loss'], label='Training')
plt.plot( test_log['loss'], label='Validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.tight_layout()
fname = os.path.join(out_folder, 'plot-loss')
plt.savefig(fname+'.png')
plt.savefig(fname+'.pdf', format='pdf')

plt.figure()
plt.plot(train_log['acc'], label='Training')
plt.plot( test_log['acc'], label='Validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.tight_layout()
fname = os.path.join(out_folder, 'plot-accuracy')
plt.savefig(fname+'.png')
plt.savefig(fname+'.pdf', format='pdf')