# Title by Owner

## Imports

In [89]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from datetime import datetime
from os import path
from utils.callbacks import SaveBestModelInMemory
from utils.submission import create_submission_zip

## Constants

In [90]:
NUM_CLASSES = 12
RANDOM_STATE = 42 # Seed for rng to make everything reproducible and deterministic af
SAVED_MODELS_PATH = "saved-models"
TENSORBOARD_LOGS_PATH = "tensorboard-logs"
SUBMISSIONS_PATH = "../submissions"

## Parameters

In [91]:
BATCH_SIZE = 64 # Number of samples in a mini batch
EPOCHS = 30 # Number of training epochs before the training is stopped
TEST_SPLIT = 0.20 # Percent of data to use for validation/testing

## Data Loading and Preprocessing

In [92]:
data = np.load(file="../dataset/x_train.npy")
labels = np.load(file="../dataset/y_train.npy")
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=TEST_SPLIT, random_state=RANDOM_STATE)

def augment_sample(sample: np.ndarray) -> np.ndarray:
    noise = np.random.normal(0, 1, (36, 6))
    aug_sample = sample + noise
    roll_amount = np.random.randint(-6, 6)
    aug_sample = np.roll(aug_sample, roll_amount, axis=0)
    if roll_amount > 0:
        aug_sample[:roll_amount, :] = np.zeros(sample.shape[-1])
    elif roll_amount < 0:
        aug_sample[roll_amount:, :] = np.zeros(sample.shape[-1])
    return aug_sample

min_samples_per_class = 250
total_added_samples = 0
for cls in range(NUM_CLASSES):
    class_data = train_data[train_labels == cls]
    samples_to_add = min_samples_per_class - len(class_data)
    if samples_to_add <= 0:
        print(f"Class {cls} has {len(class_data)} samples. Skipping.")
        continue

    print(f"Class {cls} has {len(class_data)} samples. Adding {samples_to_add} samples.")
    total_added_samples += samples_to_add
    augmented_data = []
    for _ in range(samples_to_add):
        sample = class_data[np.random.randint(0, len(class_data))]
        augmented_sample = augment_sample(sample)
        augmented_data.append(sample)
    print(f"Sample for class {cls}: {augmented_sample}")
    train_data = np.concatenate([train_data, augmented_data])
    train_labels = np.concatenate([train_labels, [cls] * samples_to_add])

train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=NUM_CLASSES)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=NUM_CLASSES)

print(f"Added {total_added_samples} to the training dataset, now all classes have at least {min_samples_per_class} samples")



# Make sure everything was loaded correctly:
print(f"All samples shape: {data.shape}, all labels shape: {labels.shape}")
print(f"Train samples shape: {train_data.shape}, Train labels shape: {train_labels.shape}")
print(f"Test samples shape: {test_data.shape}, Test labels shape: {test_labels.shape}")
print("Samples per class:")
print("Train: ", np.sum(train_labels, axis=0))
print("Test: ", np.sum(test_labels, axis=0))


Class 0 has 31 samples. Adding 219 samples.
Sample for class 0: [[ 9.59983400e+00 -1.10478433e+01 -1.66227252e+01 -8.43365371e+00
   3.23975729e+00  1.16222910e+02]
 [ 3.42807602e+00 -1.75795532e+01 -1.21322519e+01 -1.22816938e+01
  -7.15637381e-01  3.91681583e+01]
 [ 6.48184389e-01 -5.72613108e+00  4.95543343e+00 -8.80766280e+00
  -1.69701374e+01 -1.61447644e+01]
 [-1.19001360e+00  1.75094122e+01  1.16953203e+01 -1.30189412e+00
  -4.55337142e+01  2.30821128e+01]
 [-1.78242986e+00  3.06796502e+01  5.19276359e+00  5.28650543e+00
  -2.56747754e+01 -1.28537716e+01]
 [-1.86205383e+00  1.03890198e+01  7.42321686e+00  1.03456406e+01
  -1.44985800e+01 -7.98430939e+00]
 [-2.82147805e+00 -2.05982954e+01  5.05722421e+00  5.50738364e+00
  -2.62880856e+01 -5.69585517e+01]
 [-5.34773386e+00 -3.43835535e+01 -1.25061749e+01 -4.80960991e+00
  -1.85766958e+01 -6.08033821e+01]
 [-7.73585265e+00  7.53257972e-01 -9.84096085e+00 -2.90500470e+00
  -3.53107507e+01  4.16993399e+01]
 [-1.44076807e+01  5.353703

## Model Definition

In [93]:
def recall_m(y_true, y_pred):
    true_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_true * y_pred, 0, 1)))
    possible_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + tf.keras.backend.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_true * y_pred, 0, 1)))
    predicted_positives = tf.keras.backend.sum(tf.keras.backend.round(tf.keras.backend.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + tf.keras.backend.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+tf.keras.backend.epsilon()))

# Create the model inside the function this is a dummy implementation
def build_model(name: str, input_shape: tuple[int,...], classes: int) -> tf.keras.Model:
    # Build the neural network layer by layer
    input_layer = tf.keras.layers.Input(shape=input_shape, name="Input")

    # Feature extractor
    lstm = tf.keras.layers.LSTM(128, return_sequences=True)(input_layer)
    lstm = tf.keras.layers.LSTM(128)(lstm)
    dropout = tf.keras.layers.Dropout(.5, seed=RANDOM_STATE)(lstm)

    # Classifier
    classifier = tf.keras.layers.Dense(128, activation="relu")(dropout)
    output_layer = tf.keras.layers.Dense(classes, activation="softmax")(classifier)

    # Connect input and output through the Model class
    model = tf.keras.Model(inputs=input_layer, outputs=output_layer, name=name)

    # Compile the model
    model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(), metrics=["accuracy",f1_m,precision_m, recall_m])

    # Return the model
    return model

## Training

In [94]:
input_shape = train_data.shape[1:]
classes = NUM_CLASSES
model_name = "awesome-model-balanced-classes" # Give your model an awesome name for a 2% percent accuracy increase.

model = build_model(model_name, input_shape, classes)
model.summary()

run_id = datetime.utcnow().strftime("%Y-%m-%d-%H-%M-%S")
current_tensorboard_log_dir = f"{TENSORBOARD_LOGS_PATH}/{model_name}/{run_id}"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=current_tensorboard_log_dir)
print(f"Run tensorboard in a separate process with:\n"
      f"tensorboard --logdir {path.abspath(TENSORBOARD_LOGS_PATH)}\nor\n"
      f"tensorboard --logdir {path.abspath(current_tensorboard_log_dir)}")

best_weights_callback = SaveBestModelInMemory(metric="val_loss")

model.fit(x=train_data, y=train_labels, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(test_data, test_labels), callbacks=[tensorboard_callback, best_weights_callback])

Model: "awesome-model-balanced-classes"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input (InputLayer)          [(None, 36, 6)]           0         
                                                                 
 lstm_14 (LSTM)              (None, 36, 128)           69120     
                                                                 
 lstm_15 (LSTM)              (None, 128)               131584    
                                                                 
 dropout_7 (Dropout)         (None, 128)               0         
                                                                 
 dense_14 (Dense)            (None, 128)               16512     
                                                                 
 dense_15 (Dense)            (None, 12)                1548      
                                                                 
Total params: 218,764
Trainable para



Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2bf2a4370>

## Optional: Save model in memory

In [95]:
model.set_weights(best_weights_callback.best_weights)
saved_model_path = f"{SAVED_MODELS_PATH}/{model_name}/{run_id}"
model.save(saved_model_path)



INFO:tensorflow:Assets written to: saved-models/awesome-model-balanced-classes/2022-12-15-15-11-02/assets


INFO:tensorflow:Assets written to: saved-models/awesome-model-balanced-classes/2022-12-15-15-11-02/assets


## Optional: Create submission ZIP

In [96]:
submission_path = f"{SUBMISSIONS_PATH}/{model_name}/{run_id}"
create_submission_zip(submission_path, saved_model_path)

print(f"Created submission: {submission_path}.zip")

Created submission: ../submissions/awesome-model-balanced-classes/2022-12-15-15-11-02.zip
