# Train a model and test it on the final test dataset
<a href="https://colab.research.google.com/github/jarusgnuj/ioctm358/blob/master/notebooks/time_series_classification/4_TSC_final_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1 Exercise 4a : Competition part II
In notebook 3 you used the development dataset and performed a hyperparameter search. You selected a set of hyperparameters that you believe will generate the best performing model. Now we'll test it out on previously unseen data, the final test dataset.


Copy in your build_model function where indicated in a cell below.


+ Best performing model - the highest accuracy when tested on the final test dataset.
+ Tie-breaker - the highest accuracy on the last sample in the final test dataset.

In [None]:
import time

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.models import model_from_json

# General settings
sns.set_style('whitegrid')
model_palette = ['rebeccapurple', 'mediumspringgreen']

class_names = ['cement', 'carpet']
class_colors = ['darkorange', 'steelblue']

## 1.1 Functions
Some functions, for convenience.

In [None]:
def load_data(filename):
    ''' Load the data from a file in a GitHub repo '''
    url_root = 'https://raw.githubusercontent.com/jarusgnuj/ai-ml-wksh/master/data/UCR_TSC_archive/SonyAIBORobotSurface1_IoC'
    url = url_root+'/'+filename
    robot_df = pd.read_csv(url, sep='\t', header=None)
    print('Loaded from', url)
    robot_data = robot_df.values
    print('The shape of robot_data is', robot_data.shape)
    return robot_data


def preprocess_data(robot_data):
    ''' Split the data in to data samples and labels. Convert classlabels in to 0 and 1 '''
    labels = robot_data[:,0]
    data_samples = robot_data[:,1:]
    print('The shape of the data matrix is', data_samples.shape)
    print('The shape of the labels vector is', labels.shape)

    # Change from classes 1 and 2 to classes 0 and 1, for convenience of use with Keras
    labels = labels - 1
    labels = labels.astype(int)
    print('Number of samples of class 0', (labels == 0).sum())
    print('Number of samples of class 1', (labels == 1).sum())

    return data_samples, labels

In [None]:
dev_data = load_data('SonyAIBORobotSurface1_IoC_DEV.txt')
data_train, labels_train = preprocess_data(dev_data)

final_test_data = load_data('SonyAIBORobotSurface1_IoC_FINAL_TEST.txt')
data_final_test, labels_final_test = preprocess_data(final_test_data)

## 1.2 Paste in your build model function and other hyperparameters
+ Copy in your selected model below.
+ Set other hyperparameters - epochs and batch size.

In [None]:
input_dim = data_train.shape[1]

def build_model(print_summary=False):
    ''' Return a model with randomly initialised weights '''
    ### Insert code here ###
    # Copy in the code to build your selected model.
    model = Sequential([
        Dense(16, input_dim=input_dim, activation='relu', name='Layer1'),
        Dense(8, activation='relu', name='Layer2'), 
        Dense(1, activation='sigmoid', name='OutputLayer')
    ])
    ### End of insert code ###
    optimizer = keras.optimizers.Adam() 
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    if print_summary:
        print(model.summary())
    return model

In [None]:
model = build_model()
batch_size = 5 ### CHANGE PARAMETER HERE ###
epochs = 50    ### CHANGE PARAMETER HERE ###

## 1.3 Train your model 
Train your model using all of the development data

In [None]:
start = time.time()
hist = model.fit(data_train, labels_train, batch_size=batch_size, epochs=epochs, 
                 validation_data=(data_final_test, labels_final_test), verbose=1)
end = time.time()
log = pd.DataFrame(hist.history) 
print('Training complete in', str(round(end-start)), 'seconds')

## 1.4 Evaluate your model

In [None]:
result = model.evaluate(data_final_test, labels_final_test, batch_size=batch_size)
print('Validation accuracy is', result[1])

# Plot the log's accuracy data.
ax = log[['acc', 'val_acc']].plot(title='Accuracy during training', color=model_palette)
ax.set_xlabel("Model training epoch")
ax.set_ylabel("Accuracy")
ax.legend(["training", "final test"]);

### 1.4.1 Tie-breaker
How does your model perform on the last sample in the final test dataset?

In [None]:
sample_num = -1 # In Python, -1 means the last element in the array
data_sample = data_final_test[sample_num]
data_sample = np.array( [data_sample,] )
probability = model.predict(data_sample)
print('Model: probability of belonging to class 1:', probability[0][0])
print('Pred class:', (np.round(probability)[0][0].astype(int)))
print('True class:', labels_final_test[sample_num])

## End of exercise 4a

# 2 Save and reload the model
This trained model can now be saved and used again on another day, or on another machine. It doesn't need to be re-trained every time.

## 2.1 Save

In [None]:
do_save_and_load = False ### CHANGE PARAMETER HERE ###

In [None]:
# Save the model to a JSON file
model_file = 'model.json'
model_weights_file = 'model.h5'
if do_save_and_load:
    model_json = model.to_json()
    with open(model_file, 'w') as json_file:
        json_file.write(model_json)
    # Save the model's weights to an HDF5 file
    model.save_weights(model_weights_file)
    print('Model saved to file', model_file)
    print('Model weights saved to file', model_weights_file)

## 2.2 Load

In [None]:
if do_save_and_load:
    json_file = open(model_file, 'r')
    loaded_json_model = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_json_model)
    # load weights into new model
    loaded_model.load_weights(model_weights_file)
    print('Model loaded from files')

## 2.3 Compile and use the loaded model

In [None]:
if do_save_and_load:
    optimizer = keras.optimizers.Adam()
    loaded_model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    result = loaded_model.evaluate(data_final_test, labels_final_test, batch_size=batch_size)
    print('Loaded model: validation accuracy is', result[1])
    labels_probability = loaded_model.predict_on_batch(data_final_test)
    labels_predicted_class = np.round(labels_probability).flatten()
    print('Some of the test results:')
    print('True', labels_final_test[:23])
    print('Pred', labels_predicted_class[:23].astype(int))