In [None]:
colab = False

In [None]:
import logging
from datetime import datetime

if not colab:
  current_file_name = "15_Neural_Net_Model"

  dt_string = datetime.now().strftime("%Y%m%d_%H%M%S")
  log_file = f"logs/{current_file_name}/{dt_string}.log"
  logging.basicConfig(level=logging.INFO, filename=log_file,filemode="w", format="%(asctime)s %(levelname)s %(message)s")

  # https://blog.sentry.io/logging-in-python-a-developers-guide/

In [None]:
!pip install wandb
!pip install tqdm

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

import plotly.express as px

from tqdm.notebook import tqdm

import wandb
from wandb.integration.keras import WandbCallback

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Masking, Dropout
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall, TruePositives, TrueNegatives, FalsePositives, FalseNegatives, F1Score
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import load_model

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
if colab:
  from google.colab import drive
  drive.mount('/content/drive')

In [None]:
if colab:
  path_to_data = "/content/drive/MyDrive/2 DP/3 DP Riešenie/Google Colab/trajectories_data.csv"
else:
  path_to_data = "data\\13_Mouse_Data_Preparation\\trajectories_data.csv"

In [None]:
df = pd.read_csv(path_to_data)
df.head()

In [None]:
print(df[["variant", "respondent", "page_name", "x", "y", "indicator_fg"]])

In [None]:
normal_width = 1920
normal_height = 1080

test_respondent = df[df["respondent"] == "respondent_26"]
test_respondent = test_respondent[test_respondent["page_name"] == "page_5"]

# Make animation
fig = px.scatter(test_respondent, x='x', y='y', animation_frame='seconds', range_x=[0, normal_width], range_y=[0, normal_height])
fig.show()

## Train-Test Split

In [None]:
train_fg_respondents = ['respondent_43', 'respondent_26', 'respondent_35', 'respondent_31', 'respondent_53', 'respondent_21', 'respondent_22', 'respondent_50', 'respondent_42', 'respondent_55', 'respondent_54', 'respondent_16', 'respondent_9', 'respondent_105', 'respondent_37', 'respondent_58', 'respondent_38', 'respondent_51', 'respondent_106', 'respondent_15', 'respondent_52', 'respondent_25', 'respondent_12', 'respondent_56', 'respondent_46', 'respondent_36']
train_h_respondents = ['respondent_8', 'respondent_24', 'respondent_42', 'respondent_17', 'respondent_29', 'respondent_108', 'respondent_30', 'respondent_39', 'respondent_58', 'respondent_10', 'respondent_19', 'respondent_53', 'respondent_45', 'respondent_52', 'respondent_33', 'respondent_16', 'respondent_21', 'respondent_32', 'respondent_23', 'respondent_35', 'respondent_47', 'respondent_48', 'respondent_31', 'respondent_20']
test_fg_respondents = ['respondent_104', 'respondent_18', 'respondent_34', 'respondent_40', 'respondent_45', 'respondent_48', 'respondent_49']
test_h_respondents = ['respondent_107', 'respondent_110', 'respondent_22', 'respondent_27', 'respondent_50', 'respondent_57', 'respondent_9']

In [None]:
# Create train and test datasets
train_fg = df[(df["variant"] == "FG") & (df["respondent"].isin(train_fg_respondents))]
train_h = df[(df["variant"] == "H") & (df["respondent"].isin(train_h_respondents))]
test_fg = df[(df["variant"] == "FG") & (df["respondent"].isin(test_fg_respondents))]
test_h = df[(df["variant"] == "H") & (df["respondent"].isin(test_h_respondents))]

# Create train and test datasets
train_df = pd.concat([train_fg, train_h])
test_df = pd.concat([test_fg, test_h])

print(len(train_df), len(test_df))

In [None]:
print(len(train_df), len(test_df), len(train_df) + len(test_df), len(df))

In [None]:
if not colab:
    train_df.to_csv("data\\15_Neural_Net_Model\\data\\train_df.csv")
    test_df.to_csv("data\\15_Neural_Net_Model\\data\\test_df.csv")

## Data Grouping

In [None]:
train_grouped = train_df.groupby(['variant', 'respondent', 'page_name'])
len(train_grouped)

In [None]:
test_grouped = test_df.groupby(['variant', 'respondent', 'page_name'])
len(test_grouped)

## Sequence Preparation

In [None]:
def prepare_sequences(df):
    sequences = []
    labels = []

    for _, group in df:
        # Here, each group will be a DataFrame containing the rows for a specific observation
        sequences.append(group[['x', 'y']].values)
        labels.append(group['indicator_fg'].iloc[0])  # Assuming all values in indicator_fg are the same within a group

    # Convert lists to arrays for processing
    X = np.array(sequences, dtype=object)  # Keeping as an object array to handle variable lengths
    y = np.array(labels, dtype=float)

    return X, y

In [None]:
X_train, y_train = prepare_sequences(train_grouped)
X_val, y_val = prepare_sequences(test_grouped)

In [None]:
print(len(X_train) + len(X_val))
print(len(y_train) + len(y_val))

## Model

In [None]:
# Build the model
model = Sequential()
model.add(Masking(mask_value=0., input_shape=(None, 2)))  # Assuming the padding value is 0

architecture = 1

if architecture == 1:
    # 1 generous-lion-25
    # 1 raw earnest-plasma-32
    model.add(LSTM(50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(50))
    model.add(Dropout(0.2))

elif architecture == 2:
    # 2 honest-dragon-30
    # 2 raw sleek-shape-31
    model.add(LSTM(128))
    model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[BinaryAccuracy(), Precision(), Recall(), TruePositives(), TrueNegatives(), FalsePositives(), FalseNegatives()])

model.summary()

## Training with Variable Length Data

In [None]:
def batch_generator(X, y, batch_size=32):
    """Generate batches of data."""
    indices = np.arange(len(X))
    np.random.shuffle(indices)

    for start in range(0, len(indices), batch_size):
        end = min(start + batch_size, len(X))
        batch_indices = indices[start:end]

        max_len = max(len(X[idx]) for idx in batch_indices)  # Find max length in the batch
        batch_x = np.array([np.pad(X[idx], ((0, max_len - len(X[idx])), (0, 0)), 'constant') for idx in batch_indices])
        batch_y = y[batch_indices]

        yield batch_x, batch_y


## Initialize wandb

In [None]:
wandb.login()

wandb.init(project="mouse-movement-lie-detection")

config = wandb.config
config.epochs = 50
config.batch_size = 64
config.learning_rate = 0.001

In [None]:
run_id = wandb.run.id
run_name = wandb.run.name

## Model Training

In [None]:
epochs = 50
batch_size = 64

# Metrics placeholders
history = {
    'train_loss': [],
    'train_binary_accuracy': [],
    'train_precision': [],
    'train_recall': [],
    'train_true_positives': [],
    'train_true_negatives': [],
    'train_false_positives': [],
    'train_false_negatives': [],
    'val_loss': [],
    'val_binary_accuracy': [],
    'val_precision': [],
    'val_recall': [],
    'val_true_positives': [],
    'val_true_negatives': [],
    'val_false_positives': [],
    'val_false_negatives': [],
}

# Training loop
for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")

    wandb_dict = {"epoch": epoch+1}

    # Training phase
    train_metrics = {'loss': [],
                     'binary_accuracy': [],
                     'precision': [],
                     'recall': [],
                     'true_positives': [],
                     'true_negatives': [],
                     'false_positives': [],
                     'false_negatives': [],
                     }
    for X_batch, y_batch in tqdm(batch_generator(X_train, y_train, batch_size), desc="Training batches"):
        metrics = model.train_on_batch(X_batch, y_batch, return_dict=True)
        for key in train_metrics:
            train_metrics[key].append(metrics[key])

    # Aggregate metrics for the epoch
    for key in train_metrics:
        history[f'train_{key}'].append(np.mean(train_metrics[key]))
        wandb_dict[f'train_{key}'] = np.mean(train_metrics[key])

    # Validation phase
    val_metrics = {'loss': [],
                   'binary_accuracy': [],
                   'precision': [],
                   'recall': [],
                   'true_positives': [],
                   'true_negatives': [],
                   'false_positives': [],
                   'false_negatives': [],
                   }
    for X_batch, y_batch in tqdm(batch_generator(X_val, y_val, batch_size), desc="Validation batches"):
        metrics = model.test_on_batch(X_batch, y_batch, return_dict=True)
        for key in val_metrics:
            val_metrics[key].append(metrics[key])

    # Aggregate metrics for the epoch
    for key in val_metrics:
        history[f'val_{key}'].append(np.mean(val_metrics[key]))
        wandb_dict[f'val_{key}'] = np.mean(val_metrics[key])

    # Log the metrics for this epoch
    wandb.log(wandb_dict)

    # Log the results for this epoch
    print(f"Train loss: {history['train_loss'][-1]}, Val loss: {history['val_loss'][-1]}")
    print(f"Train binary accuracy: {history['train_binary_accuracy'][-1]}, Val binary accuracy: {history['val_binary_accuracy'][-1]}")
    print(f"Train precision: {history['train_precision'][-1]}, Val precision: {history['val_precision'][-1]}")
    print(f"Train recall: {history['train_recall'][-1]}, Val recall: {history['val_recall'][-1]}")
    print(f"Train true positives: {history['train_true_positives'][-1]}, Val true positives: {history['val_true_positives'][-1]}")
    print(f"Train true negatives: {history['train_true_negatives'][-1]}, Val true negatives: {history['val_true_negatives'][-1]}")
    print(f"Train false positives: {history['train_false_positives'][-1]}, Val false positives: {history['val_false_positives'][-1]}")
    print(f"Train false negatives: {history['train_false_negatives'][-1]}, Val false negatives: {history['val_false_negatives'][-1]}")

## Plotting the Results

In [None]:
plt.figure(figsize=(18, 5))

# Binary Accuracy plot
plt.subplot(1, 4, 1)
plt.plot(history['train_binary_accuracy'], label='Train Binary Accuracy')
plt.plot(history['val_binary_accuracy'], label='Validation Binary Accuracy')
plt.title('Binary Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Binary Accuracy')
plt.legend()

# Loss plot
plt.subplot(1, 4, 2)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Precision plot
plt.subplot(1, 4, 3)
plt.plot(history['train_precision'], label='Train Precision')
plt.plot(history['val_precision'], label='Validation Precision')
plt.title('Precision over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.legend()

# Recall plot
plt.subplot(1, 4, 4)
plt.plot(history['train_recall'], label='Train Recall')
plt.plot(history['val_recall'], label='Validation Recall')
plt.title('Recall over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.legend()


## Save the Model

In [None]:
if colab:
  model_path = f"/content/drive/MyDrive/2 DP/3 DP Riešenie/Google Colab/{run_name}.keras"
else:
  model_path = f"data\\15_Neural_Net_Model\\models\\{run_name}.keras"

# Save the model
model.save(model_path)  # Using keras format

print(f"Model saved to {model_path}")

## Load the Model

In [None]:
# Load the model
loaded_model = load_model(model_path)

print("Model loaded successfully.")


## Wandb Finish

In [None]:
wandb.finish()