In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/My Drive/ANN_Challenge2

/content/drive/My Drive/ANN_Challenge2


In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd 
!pip install visualkeras
import visualkeras

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
!unzip training_dataset_homework2.zip

unzip:  cannot find or open training_dataset_homework2.zip, training_dataset_homework2.zip.zip or training_dataset_homework2.zip.ZIP.


In [None]:
# import dataset
X_train = np.load('/content/drive/My Drive/ANN_Challenge2/training_dataset_homework2/x_train.npy')
y_train = np.load('/content/drive/My Drive/ANN_Challenge2/training_dataset_homework2/y_train.npy')

In [None]:
# inspect data shape
X_train.shape, y_train.shape

((2429, 36, 6), (2429,))

In [None]:
# set input shape of a sample
input_shape = X_train.shape[1:]

In [None]:
# inspect class distribution
labels = {
    0: 'Wish',
    1: 'Another',
    2: 'Comfortably',
    3: 'Money',
    4: 'Breathe',
    5: 'Time',
    6: 'Brain',
    7: 'Echoes',
    8: 'Wearing',
    9: 'Sorrow',
    10: 'Hey',
    11: 'Shine',
}

y = [None]*len(y_train)
for i in range(len(y_train)):
  y[i] = labels[y_train[i]]

plt.figure(figsize=(17,5))
sns.countplot(y)
plt.title('Count of samples per class')
plt.show()

In [None]:
# Convert the sparse labels to categorical values
y_train = tfk.utils.to_categorical(y_train)
X_train.shape, y_train.shape

((2429, 36, 6), (2429, 12))

In [None]:
# shuffle data
from sklearn.utils import shuffle
X_train, y_train = shuffle(X_train, y_train, random_state=0)

In [None]:
# set hyperparameters for training
classes = 12
batch_size = 64
epochs = 200

In [None]:
# defines a custome scaler to be applied before the actual model, both at training and prediction time
class CustomScalerLayer(tfkl.Layer):
  def __init__(self, mean, std):
    super(CustomScalerLayer, self).__init__()
    self.mean = mean
    self.std = std
  def call(self, inputs):
    return (inputs - self.mean)/self.std

In [None]:
# compute mean and std per feature
data_mean = np.empty(shape=6)
data_std = np.empty(shape=6)

for i in range(6):
  data_mean[i] = X_train[:,:,i].mean()
  data_std[i] = X_train[:,:,i].std()

data_mean, data_std

In [None]:
def build_model(input_shape, classes, mean, std):
    input_layer = tfkl.Input(input_shape)

    # standard scale input
    scaler = CustomScalerLayer(mean,std)(input_layer)

    # BLOCK 1 of convulutions
    conv1 = tfkl.Conv1D(64, 8, padding='same')(scaler)
    conv1 = tfkl.Activation('relu')(conv1)

    conv2 = tfkl.Conv1D(64, 5, padding='same')(conv1)
    conv2 = tfkl.Activation('relu')(conv2)
    
    conv3 = tfkl.Conv1D(64, 3, padding='same')(conv2)

    # expand channels for the sum
    shortcut = tfkl.Conv1D(filters=64, kernel_size=1, padding='same')(scaler)
    shortcut = tfkl.BatchNormalization()(shortcut)

    # sum to create the skip connection
    output_layer_block_1 = tfkl.add([shortcut, conv3])
    output_layer_block_1 = tfkl.Activation('relu')(output_layer_block_1)

    # BLOCK 2 of convulutions
    conv1 = tfkl.Conv1D(128, 8, padding='same')(output_layer_block_1)
    conv1 = tfkl.Activation('relu')(conv1)

    conv2 = tfkl.Conv1D(128, 5, padding='same')(conv1)
    conv2 = tfkl.Activation('relu')(conv2)
    
    conv3 = tfkl.Conv1D(128, 3, padding='same')(conv2)

    # expand channels for the sum
    shortcut = tfkl.Conv1D(filters=128, kernel_size=1, padding='same')(output_layer_block_1)
    shortcut = tfkl.BatchNormalization()(shortcut)

    # sum to create the skip connection
    output_layer_block_2 = tfkl.add([shortcut, conv3])
    output_layer_block_2 = tfkl.Activation('relu')(output_layer_block_2)

    # BLOCK 3  of convulutions
    conv1 = tfkl.Conv1D(128, 8, padding='same')(output_layer_block_2)
    conv1 = tfkl.Activation('relu')(conv1)

    conv2 = tfkl.Conv1D(128, 5, padding='same')(conv1)
    conv2 = tfkl.Activation('relu')(conv2)
    
    conv3 = tfkl.Conv1D(128, 3, padding='same')(conv2)

    # no need to expand channels because they are equal
    shortcut = tfkl.BatchNormalization()(output_layer_block_2)

    # sum to create the skip connection
    output_layer_block_3 = tfkl.add([shortcut, conv3])
    output_layer_block_3 = tfkl.Activation('relu')(output_layer_block_3)

    # FINAL

    # expand channels to match the size of the output of the last block
    shortcut = tfkl.Conv1D(filters=128, kernel_size=1, padding='same')(scaler)

    # compute attention between input and output of the last block
    attention = tfkl.MultiHeadAttention(10,32)(shortcut, output_layer_block_3)

    gap_layer = tfkl.GlobalAveragePooling1D()(attention)
    
    output_layer = tfkl.Dense(classes, activation='softmax')(gap_layer)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')

    return model

In [None]:
# build model
model = build_model(input_shape, classes, data_mean, data_std)
model.summary()

In [None]:
# train the model
history = model.fit(
    x = X_train,
    y = y_train,
    batch_size = batch_size, 
    epochs = epochs,
    validation_split=.2,
    callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=100, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=5, factor=0.5, min_lr=1e-5)
    ]
).history

In [None]:
model.save('SubmissionModel')

