In [None]:
import tensorflow as tf
import numpy as np
import os
import random
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16) 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
import warnings
import logging


tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

In [None]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

In [None]:
x_train_val= np.load('/kaggle/input/time-series/x_train.npy')
y_train_val=np.load('/kaggle/input/time-series/y_train.npy')

In [None]:
print("X_train shape",x_train_val.shape)
print("Y_train shape",y_train_val.shape)

In [None]:
labels={0: "Wish",
1: "Another",
2: "Comfortably",
3: "Money",
4: "Breathe",
5: "Time",
6: "Brain",
7: "Echoes",
8: "Wearing",
9: "Sorrow",
10: "Hey",
11: "Shine"}

In [None]:
# UNDERSTANDING THE DATA : VISUALIZE

# At first it appears like: the data is represented on 36 pts time series, and for 
# each point of time there is a value for 6 different features; it is then 36 pts 
# time series of 6 different features

def plot_example(random_index, x,y ):
    example=x[random_index]
    example_label=y[random_index]

    n_points=example.shape[0] # 36
    n_features=example.shape[1] # 6

    counter=0
    fig, axs= plt.subplots(2, 3, figsize=(20,10))
    fig.suptitle('Category : '+labels[example_label])
    for i in range(n_features):
        row= counter//3
        col= counter%3
        example_on_ith_feature=example[:,i]
        axs[row,col].set_title('Feature ° '+str(i+1))
        axs[row,col].plot(example_on_ith_feature)
        counter=counter+1




random_index=np.random.randint(0,2428) 
plot_example(random_index,x_train_val,y_train_val)


In [None]:
# CLASS REPARTITIONS

class_repartitions={"Wish":0,
"Another":0,
"Comfortably":0,
"Money":0,
"Breathe":0,
"Time":0,
"Brain":0,
"Echoes":0,
"Wearing":0,
"Sorrow":0,
"Hey":0,
"Shine":0}

for y in y_train_val:
    label=labels[y]
    class_repartitions[label]=class_repartitions[label]+1
    
S=0
for key in class_repartitions:
    S=S+class_repartitions[key]


print("TOTAL : ",S)
plt.figure(figsize=(20,20))
plt.bar(class_repartitions.keys(), class_repartitions.values(), color='g')


# ===> UNBALANCED DATASET

In [None]:
n_total=x_train_val.shape[0]
n_classes=12


class_loss_weights = {
    class_number: (1 / class_repartitions[labels[class_number]]) * (n_total / 12) for class_number in range(12)
}

class_loss_weights

In [None]:
# WORKING THE DATA IN AMOUNT

In [None]:
# PREPROCESSING

In [None]:
# Studiying each feature/variable separately (statistics, distribution, boxplots ...)

x_train_val_flattened =[]
y_train_val_flattened=[]

for i in range(n_total):
    for k in range(36):
        x_train_val_flattened.append([])
        y_train_val_flattened.append(y_train_val[i])
        for f in range(6):
            x_train_val_flattened[len(x_train_val_flattened)-1].append(x_train_val[i,k,f])

x_train_val_flattened=np.array(x_train_val_flattened)           


df=pd.DataFrame(x_train_val_flattened)
df["class"] = y_train_val_flattened
df["class_name"] = [ labels[y] for y in y_train_val_flattened]

In [None]:
df

In [None]:
# Some info on the dataset

df[[0,1,2,3,4,5]].describe()

In [None]:
# Taking outliers into account for scaling : ROBUST SCALER !!!!

In [None]:
from sklearn.preprocessing import RobustScaler

transformer = RobustScaler()
transformer.fit(x_train_val_flattened)
medians=transformer.center_
IQR=transformer.scale_
print("medians values : ",medians)
print("IQR ranges : ", IQR)


x_train_val_scaled = x_train_val.copy()

for i in range(n_total):
    x_train_val_scaled[i]=transformer.transform(x_train_val_scaled[i])

print()
print("Verification on a sample: ")
print(" original : " ,x_train_val[0][0])
print(" robust scaled : " ,x_train_val_scaled[0][0])
should_be = []
for f in range(6):
    s=(x_train_val[0][0][f] - medians[f])/IQR[f]
    should_be.append(s.round(7))
print(" should be : " ,should_be)
print()

print("Robust Scaling works well after verification.")

In [None]:
# Verification of the scaling impact on some examples

random_index=np.random.randint(0,2428) 

print("ORIGINAL :")
plot_example(random_index,x_train_val,y_train_val)

In [None]:
print("NORMALIZED :")
plot_example(random_index,x_train_val_scaled,y_train_val)

In [None]:
# SPLITTING

from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x_train_val,y_train_val, test_size = 0.1, random_state=seed,stratify= y_train_val )

In [None]:
print(x_train.shape, x_val.shape)
print(y_train.shape, y_val.shape)

In [None]:
# Checking repartitions after split, to see if it conserves repartition 

In [None]:
train_class_repartitions={"Wish":0,
"Another":0,
"Comfortably":0,
"Money":0,
"Breathe":0,
"Time":0,
"Brain":0,
"Echoes":0,
"Wearing":0,
"Sorrow":0,
"Hey":0,
"Shine":0}

for y in y_train:
    label=labels[y]
    train_class_repartitions[label]=train_class_repartitions[label]+1
    
S=0
for key in train_class_repartitions:
    S=S+train_class_repartitions[key]

print( "TRAINING SPLIT REPARTITION")
print("TOTAL : ",S)
plt.figure(figsize=(20,20))
plt.bar(train_class_repartitions.keys(), train_class_repartitions.values(), color='g')

In [None]:
val_class_repartitions={"Wish":0,
"Another":0,
"Comfortably":0,
"Money":0,
"Breathe":0,
"Time":0,
"Brain":0,
"Echoes":0,
"Wearing":0,
"Sorrow":0,
"Hey":0,
"Shine":0}

for y in y_val:
    label=labels[y]
    val_class_repartitions[label]=val_class_repartitions[label]+1
    
S=0
for key in val_class_repartitions:
    S=S+val_class_repartitions[key]

print( "VALIDATION SPLIT REPARTITION")
print("TOTAL : ",S)
plt.figure(figsize=(20,20))
plt.bar(val_class_repartitions.keys(), val_class_repartitions.values(), color='g')

In [None]:
# Encoding target variable

y_train_categorical = tfk.utils.to_categorical(y_train)
y_val_categorical =  tfk.utils.to_categorical(y_val)

print(y_train_categorical.shape,y_val_categorical.shape )

In [None]:
# MODEL BUILDING

In [None]:
input_shape = x_train.shape[1:]
classes = y_train_categorical.shape[-1]
batch_size = 64
epochs = 200

In [None]:
# ResNet model definition 


def ResBs_Conv(block_input, num_filters): 
   
    # 0. Filter Block input and BatchNormalization
    block_input = tfkl.Conv1D(num_filters, kernel_size=7, strides=2,  padding='same')(block_input) 
    block_input = tfkl.BatchNormalization()(block_input)

    # 1. First Convolutional Layer
    conv1 = tfkl.Conv1D(filters=num_filters, kernel_size=7, padding='same')(block_input)
    norm1 = tfkl.BatchNormalization()(conv1)
    relu1 = tfkl.Activation('relu')(norm1)  
    dropout = tfkl.Dropout(0.2)(relu1)
    
    # 2. Second Convolutional Layer 
    conv2 = tfkl.Conv1D(num_filters, kernel_size=7, padding='same')(dropout) #per avere concordanza
    norm2 = tfkl.BatchNormalization()(conv2)

    # 3. Summing Layer (adding a residual connection)
    sum = tfkl.Add()([block_input, norm2])
    
    # 4. Activation Layer
    relu2 = tfkl.Activation('relu')(sum)
    
    return relu2 

def ResBs_Identity(block_input, num_filters): 

    # 1. First Convolutional Layer
    conv1 = tfkl.Conv1D(filters=num_filters, kernel_size=7, padding= 'same')(block_input)
    norm1 = tfkl.BatchNormalization()(conv1)
    relu1 = tfkl.Activation('relu')(norm1)    
    dropout = tfkl.Dropout(0.2)(relu1)
    
    # 2. Second Convolutional Layer 
    conv2 = tfkl.Conv1D(num_filters, kernel_size=7, padding= 'same')(dropout) #per avere concordanza
    norm2 = tfkl.BatchNormalization()(conv2)

    # 3. Summing Layer (adding a residual connection)
    sum = tfkl.Add()([block_input, norm2])
    
    # 4. Activation Layer
    relu2 = tfkl.Activation('relu')(sum)
    
    return relu2 

def resnet(N, ch, win_len, classes): 
    input = tfkl.Input(shape=(win_len, ch)) 

    ResNet = tfkl.Conv1D(filters=64,kernel_size=15, padding = 'same')(input) 
    ResNet = tfkl.BatchNormalization()(ResNet)
    ResNet = tfkl.Activation('relu')(ResNet)
    ResNet = tfkl.MaxPooling1D(pool_size=2, strides = 2)(ResNet)
    
    # B.5 ResBs (x8) blocks
    # First two ResNet blocks are identity blocks 
    ResNet = ResBs_Identity(ResNet, 64)
    ResNet = ResBs_Identity(ResNet, 64)

    filters = 64
    M = int((N - 2)/2)
    for i in range(M): 
        filters = filters * 2
        # define N-th ResBs block
        ResNet = ResBs_Conv(ResNet, filters)
        ResNet = ResBs_Identity(ResNet, filters)
    
    ResNet = tfkl.GlobalAveragePooling1D(name='gmp_layer')(ResNet)
#     ResNet = tfkl.Flatten()(ResNet) 

#     ResNet = tfkl.Dropout(0.2, seed=seed)(ResNet)
#     ResNet= tfkl.Dense(
#     512, 
#     activation='relu',
#     kernel_initializer = tfk.initializers.HeUniform(seed))(ResNet)
    
    
    # Softmax activation function on the last layer
#     ResNet = tfkl.Dropout(0.3, seed=seed)(ResNet)
    ResNet = tfkl.Dense(classes, activation='softmax')(ResNet)

    
    # Finally the model is composed by connecting inputs to outputs: 
    model = tfk.Model(inputs=[input],outputs=ResNet)
    
    
    model.compile(
        
    optimizer = tfk.optimizers.Adam(),
    loss = tfk.losses.CategoricalCrossentropy(),
#     metrics = ['accuracy', 'Precision', 'Recall' ]
    metrics = ['accuracy']
    )
    
    return model


In [None]:
model = resnet(N=8, ch=6, win_len=36, classes=12)
model.summary()

In [None]:
# TRAINING PHASE

In [None]:
# Train the model
history = model.fit(
    x = x_train,
    y = y_train_categorical,
    batch_size = batch_size,
    epochs = epochs,
    validation_data=(x_val, y_val_categorical),
     callbacks = [
        tfk.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=50, restore_best_weights=True),
        tfk.callbacks.ReduceLROnPlateau(monitor='val_accuracy', mode='max', patience=30, factor=0.5, min_lr=1e-5)
    ],
    class_weight=class_loss_weights,
   
).history

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

#Confution Matrix and Classification Report
# Y_pred = model_gap.predict_generator(valid_gen,  706// batch_size+1)
Y_pred = model.predict(x_val)
y_pred = np.argmax(Y_pred, axis=1)

cm=confusion_matrix(y_val, y_pred)

disp=ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues)

# Compute the classification metrics
accuracy = accuracy_score(np.argmax(y_val_categorical, axis=-1), np.argmax(Y_pred, axis=-1))
precision = precision_score(np.argmax(y_val_categorical, axis=-1), np.argmax(Y_pred, axis=-1), average='macro')
recall = recall_score(np.argmax(y_val_categorical, axis=-1), np.argmax(Y_pred, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_val_categorical, axis=-1), np.argmax(Y_pred, axis=-1), average='macro')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))



In [None]:
import shutil

model.save('resnet50')
shutil.make_archive("resnet50", 'zip', './resnet50')