# Time series classification - MLP

# Load Python packages
Import the Python packages that we will need.

In [None]:
from pathlib import Path
import time

import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.layers import Input, Dense, Activation, Dropout
from tensorflow.keras.models import Model

# General settings
sns.set_style('whitegrid')

# User settings

In [None]:
load_from_web = True

# Load the data

In [None]:
if load_from_web:
    url = 'https://raw.githubusercontent.com/Withington/deepscent/master/data/SonyAIBORobotSurface1_IoC/SonyAIBORobotSurface1_IoC_DEV.txt'
    robot_df = pd.read_csv(url, sep='\t', header=None)
    print('Loaded from', url)
    robot_data = robot_df.values
else:
    data_dir = '../../data'
    data_name = 'SonyAIBORobotSurface1_IoC'
    data_filename = data_dir+'/'+data_name+'/'+data_name+'_DEV.txt'
    robot_data = np.loadtxt(Path(data_filename))
    print('Loaded from', data_filename)
print('The shape of robot_data is', robot_data.shape)
print('robot_data:', robot_data)

In [None]:
y_dev = robot_data[:,0]
x_dev = robot_data[:,1:]
print('The shape of x_dev is', x_dev.shape)
print('The shape of y_dev is', y_dev.shape)

# Change from classes 1 and 2 to classes 0 and 1
y_dev = (y_dev - y_dev.min())/(y_dev.max()-y_dev.min())

print('Number of samples of class 0', (y_dev == 0).sum())
print('Number of samples of class 1', (y_dev == 1).sum())

# Split the development dataset into training and test datasets

In [None]:
print('Number of samples of class 0', (y_dev == 0).sum())
print('Number of samples of class 1', (y_dev == 1).sum())
y_dev_df = pd.DataFrame(y_dev)
y_dev_df[0].value_counts().plot(kind='bar')

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    x_dev, y_dev, test_size=100, random_state=21, stratify=y_dev)

print('The shape of train_data is', x_train.shape)
print('The shape of test_data is', x_test.shape)
print('Training data:')
print('Number of samples of class 0', (y_train == 0).sum())
print('Number of samples of class 1', (y_train == 1).sum())
print('Test data:')
print('Number of samples of class 0', (y_test == 0).sum())
print('Number of samples of class 1', (y_test == 1).sum())

# Pre-process the data

In [None]:
x_train_mean = x_train.mean()
x_train_std = x_train.std()
print('x_train_mean', x_train_mean)
print('x_train_std', x_train_std)

do_standardise = False
if do_standardise:
    x_train = (x_train - x_train_mean)/(x_train_std) 
    x_test = (x_test - x_train_mean)/(x_train_std) 

In [None]:
sample_a = 0 ### CHANGE PARAMETER HERE ###
sample_b = 4 ### CHANGE PARAMETER HERE ###
plt.plot(x_train[sample_a], label='category'+str(y_train[sample_a]))
plt.plot(x_train[sample_b], label='category'+str(y_train[sample_b]))
plt.legend(loc='upper right', frameon=False)

# MLP 1
Create an multilayer perceptron (MLP). This first MLP is small.

In [None]:
input_shape = x_train.shape[1:]

In [None]:
x = Input(shape=(input_shape), name='InputLayer')
# Layer 1
y = Dense(16, activation='relu', name='Layer010Dense')(x) ### CHANGE PARAMETER HERE ###
# Layer 2
y = Dense(8, activation='relu', name='Layer020Dense')(y) ### CHANGE PARAMETER HERE ###
# Output layer
out = Dense(1, activation='sigmoid', name='OutputLayer')(y)

# Build model
model_mlp1 = Model(x, out)
print(model_mlp1.summary())

## Understanding the number of parameters
TODO - exercise around the calculation that arrives at the number of parameters in each layer.

In [None]:
print(70*16+16)
print(16*8+8)
print(8+1)

## Select an optimizer and compile the model

In [None]:
optimizer = keras.optimizers.Adam()
model_mlp1.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
# TODO - can we access TensorBoard on colab? If so, add tensorboard callback

## Train MLP 1

In [None]:
batch_size = 5
epochs = 50

In [None]:
start = time.time()
hist = model_mlp1.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), verbose=1)
end = time.time()
log = pd.DataFrame(hist.history) 
print('Training complete in', round(end-start), 'seconds')

In [None]:
print('The first five rows in the log are')
log.head()

In [None]:
print('The last five rows in the log are')
log.tail()

In [None]:
log[['loss', 'val_loss']].plot()
# TODO add axes labels, etc

In [None]:
log[['acc', 'val_acc']].plot()

# Make predictions using MLP 1
Classify the data using MLP 1

In [None]:
result = model_mlp1.evaluate(x_test, y_test, batch_size=batch_size)
print('Validation accuracy is', result[1])

In [None]:
y_probability = model_mlp1.predict_on_batch(x_test)
y_predicted_class = np.round(y_probability).flatten()
print('Some of the test results:')
print('True', y_test[:23])
print('Pred', y_predicted_class[:23])

In [None]:
sample = 3
print('The probability that sample', sample, 'belongs to class 1 is', y_probability[sample][0])
print('The model classifies sample', sample, 'as class', y_predicted_class[sample])
print('The true class of sample', sample, 'is class', y_test[sample])

In [None]:
sample_a = 0 ### CHANGE PARAMETER HERE ###
sample_b = 3 ### CHANGE PARAMETER HERE ###
plt.plot(x_test[sample_a], label='True:'+str(y_test[sample_a])+' Pred:'+str(y_predicted_class[sample_a]))
plt.plot(x_test[sample_b], label='True:'+str(y_test[sample_b])+' Pred:'+str(y_predicted_class[sample_b]))
plt.legend(loc='upper right', frameon=False)