# Model Tutorial: MultiLayer Perceptron (Simple Neural Network)

The purpose of this notebook is to demonstrate how to train and predict a simple Neural Network used in this project. First, we will demonstrate the basic code, and then reproduce the results using a custom class `MLP` to make the code consistent for multiple models.

## Model Description

## Setup

In [None]:
import sys
sys.path.append('../src')
import pandas as pd
import numpy as np
import xgboost as xg
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error
import yaml
import matplotlib.pyplot as plt
# Local modules
from fmda_models import MLP
import reproducibility
from data_funcs import train_test_split_spacetime
from metrics import ros, rmse

## Read and Split Data

In [None]:
df = pd.read_pickle("../data/raws_df.pkl")
df = df.dropna(subset=['fm'])

In [None]:
# Set seed for reproducibility
reproducibility.set_seed(123)

# Create Data
X_train, X_test, y_train, y_test = train_test_split_spacetime(df[(df.index.month == 1)], test_days=2)

In [None]:
# Subset columns
X_train = X_train[["Ed", "Ew"]]
X_test = X_test[["Ed", "Ew"]]

## Manually Code MLP

In [None]:
reproducibility.set_seed(123)

In [None]:
params = {
    'hidden_units': 10,
    'activation': 'relu',
    'optimizer': 'adam',
    'epochs': 10,
    'batch_size': 32,
    'validation_split': 0.2,
    'dropout': 0.2,
    'learning_rate': 0.001  
}

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(params['hidden_units'], activation=params['activation'], input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(params['dropout']),  # Dropout layer
    tf.keras.layers.Dense(1)  # Output layer with a single neuron for regression
])
optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
model.compile(optimizer=optimizer, loss='mean_squared_error')

In [None]:
history = model.fit(X_train, y_train, epochs=params['epochs'], 
          batch_size=params['batch_size'], 
          validation_split=params['validation_split']
         )

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
fitted = model.predict(X_train)
preds = model.predict(X_test)

In [None]:
# Calculate RMSE for the training data
rmse_train = np.sqrt(mean_squared_error(y_train, fitted))

# Calculate RMSE for the test data
rmse_test = np.sqrt(mean_squared_error(y_test, preds))

# Calculate RMSE for ROS oftest data
rmse_ros = np.sqrt(mean_squared_error(ros(y_test), ros(preds)))

print("RMSE for training data:", rmse_train)
print("RMSE for test data:", rmse_test)
print("RMSE for ROS test data:", rmse_ros)

## Reproduce using MLP Class

In [None]:
with open('params.yaml', 'r') as file:
    all_params = yaml.safe_load(file)

params = all_params["mlp"]
params["input_dim"] = X_train.shape[1] # Define Input shape based on X_train
params

In [None]:
reproducibility.set_seed(123)

mlp = MLP(params)
mlp.fit(X_train, y_train, plot=True)

fitted = mlp.predict(X_train)
preds = mlp.predict(X_test)

In [None]:
mlp.eval(X_test, y_test)

In [None]:
# Calculate RMSE for the training data
rmse_train = np.sqrt(mean_squared_error(y_train, fitted))

# Calculate RMSE for the test data
rmse_test = np.sqrt(mean_squared_error(y_test, preds))

# Calculate RMSE for ROS oftest data
rmse_ros = np.sqrt(mean_squared_error(ros(y_test), ros(preds)))

print("RMSE for training data:", rmse_train)
print("RMSE for test data:", rmse_test)
print("RMSE for ROS test data:", rmse_ros)

In [None]:
mlp.model.summary()

## Using Weighted Loss

In [None]:
weights = tf.exp(tf.multiply(-0.01, y_train))

### Built-In Tensorflow

In [None]:
reproducibility.set_seed(123)
model = tf.keras.Sequential([
    tf.keras.layers.Dense(params['hidden_units'], activation=params['activation'], input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(params['dropout']),  # Dropout layer
    tf.keras.layers.Dense(1)  # Output layer with a single neuron for regression
])
optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
model.compile(optimizer=optimizer, loss='mean_squared_error')

history = model.fit(X_train, y_train, epochs=params['epochs'], 
          batch_size=params['batch_size'], 
          validation_split=params['validation_split'],
          sample_weight = weights
         )

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
fitted = model.predict(X_train)
preds = model.predict(X_test)

# Calculate RMSE for the test data
rmse_test = np.sqrt(mean_squared_error(y_test, preds))
# Calculate RMSE for the ROS of test data
rmse_ros = np.sqrt(mean_squared_error(ros(y_test), ros(preds)))


print("RMSE for test data:", rmse_test)
print("RMSE for ROS test data:", rmse_ros)

### Custom Class

In [None]:
reproducibility.set_seed(123)

mlp = MLP(params)
mlp.fit(X_train, y_train, weights = weights, plot=True)

fitted = mlp.predict(X_train)
preds = mlp.predict(X_test)
mlp.eval(X_test, y_test)

In [None]:
# Calculate RMSE for the test data
rmse_test = np.sqrt(mean_squared_error(y_test, preds))
# Calculate RMSE for the ROS of test data
rmse_ros = np.sqrt(mean_squared_error(ros(y_test), ros(preds)))


print("RMSE for test data:", rmse_test)
print("RMSE for ROS test data:", rmse_ros)