# Model Tutorial: MultiLayer Perceptron (Simple Neural Network)

The purpose of this notebook is to demonstrate how to train and predict a simple Neural Network used in this project. First, we will demonstrate the basic code, and then reproduce the results using a custom class `MLP` to make the code consistent for multiple models.

## Model Description

## Setup

In [None]:
import sys
sys.path.append('../src')
import pandas as pd
import numpy as np
import xgboost as xg
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import yaml
# Local modules
# from fmda_models import XGB
import reproducibility

## Read and Split Data

In [None]:
df = pd.read_pickle("../data/rocky_2023_06-08.pkl")

In [None]:
# Set seed for reproducibility
reproducibility.set_seed(123)

# Create Data
X_train, X_test, y_train, y_test = train_test_split(df[["Ed", "Ew"]], df['fm'], test_size=.2)

## Manually Code MLP

In [None]:
reproducibility.set_seed(123)

In [None]:
params = {
    'hidden_units': 10,
    'activation': 'relu',
    'optimizer': 'adam',
    'epochs': 10,
    'batch_size': 32,
    'validation_split': 0.2,
    'dropout': 0.2,
    'learning_rate': 0.001  
}

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(params['hidden_units'], activation=params['activation'], input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(params['dropout']),  # Dropout layer
    tf.keras.layers.Dense(1)  # Output layer with a single neuron for regression
])
optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])
model.compile(optimizer=optimizer, loss='mean_squared_error')

In [None]:
model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], validation_split=params['validation_split'])

In [None]:
fitted = model.predict(X_train)
preds = model.predict(X_test)

In [None]:
# Calculate RMSE for the training data
rmse_train = np.sqrt(mean_squared_error(y_train, fitted))

# Calculate R-squared for the training data
r2_train = r2_score(y_train, fitted)

# Calculate RMSE for the test data
rmse_test = np.sqrt(mean_squared_error(y_test, preds))

# Calculate R-squared for the test data
r2_test = r2_score(y_test, preds)

print("RMSE for training data:", rmse_train)
print("R-squared for training data:", r2_train)
print("RMSE for test data:", rmse_test)
print("R-squared for test data:", r2_test)

## Reproduce using MLP Class

In [None]:
with open('params.yaml', 'r') as file:
    all_params = yaml.safe_load(file)

params = all_params["mlp"]
params["input_dim"] = X_train.shape[1] # Define Input shape based on X_train
params

In [None]:
class MLP:
    """
    Wrapper class for multilayer perceptron neural network model.
    Parameters:
    -----------
    params : dict
        Parameters to be passed to the XGBoost model.
    loss : 
    Attributes:
    -----------
    model : keras.engine.sequential.Sequential
        Underlying neural network.
    params : dict
        Parameters passed to the NN model.

    Methods:
    --------
    fit(X_train, y_train):
        Train the model on the training data.
    predict(X_test):
        Make predictions using the trained model.
    """
    def __init__(self, params, loss='mean_squared_error'):
        """
        Initialize the MLP class.
        Parameters:
        -----------
        params : dict
            HyperParameters to be passed to the XGBoost model.
        objective : str or custom func.
        """
        self.params = params
        self.params['loss'] = loss
        self.model = self._build_model()
        self.compile_model()

    def _build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(self.params['hidden_units'], activation=self.params['activation'], input_shape=(self.params['input_dim'],)),
            tf.keras.layers.Dropout(self.params['dropout']),  # Dropout layer
            tf.keras.layers.Dense(1)  # Output layer with a single neuron for regression
        ])
        return model
    def compile_model(self):
        tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
        self.model.compile(optimizer=optimizer,
                           loss=self.params['loss'],
                           metrics=self.params.get('metrics', ['accuracy']))
    def fit(self, X_train, y_train):
        """
        Train the XGBoost model on the training data.

        Parameters:
        -----------
        X_train : array-like or sparse matrix of shape (n_samples, n_features)
            Training input samples.
        y_train : array-like of shape (n_samples,)
            Target values.
        """
        self.model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], validation_split=params['validation_split'])

    def predict(self, X_test):
        """
        Make predictions using the trained model.
        Parameters:
        -----------
        X_test : array-like or sparse matrix of shape (n_samples, n_features)
            Test input samples.

        Returns:
        --------
        array-like of shape (n_samples,)
            Predicted target values.
        """
        return self.model.predict(X_test)
    def summary(self):
        return self.model.summary()

In [None]:
reproducibility.set_seed(123)

mlp = MLP(params)

In [None]:
mlp.fit(X_train, y_train)

In [None]:
fitted = mlp.predict(X_train)
preds = mlp.predict(X_test)

In [None]:
# Calculate RMSE for the training data
rmse_train = np.sqrt(mean_squared_error(y_train, fitted))

# Calculate R-squared for the training data
r2_train = r2_score(y_train, fitted)

# Calculate RMSE for the test data
rmse_test = np.sqrt(mean_squared_error(y_test, preds))

# Calculate R-squared for the test data
r2_test = r2_score(y_test, preds)

print("RMSE for training data:", rmse_train)
print("R-squared for training data:", r2_train)
print("RMSE for test data:", rmse_test)
print("R-squared for test data:", r2_test)

In [None]:
mlp.summary()