# Neural Network MLP

This notebook will focus on the implementation of an MLP to predict student GPA.

In [2]:
import tensorflow as tf
import pandas as pd

# Import data and split into train and test sets

In [14]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv('database.csv')
print(df.head())

# Data Splitting & Normalization
scaler = StandardScaler()
input = df.drop(columns=['GPA'], errors='ignore')
input = scaler.fit_transform(input)
labels = df['GPA']
X_train, X_temp, Y_train, Y_temp = train_test_split(input, labels, test_size=0.3, random_state=42)
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=42)

   StudentID  Age  Gender  Ethnicity  ParentalEducation  StudyTimeWeekly  \
0       1001   17       1          0                  2        19.833723   
1       1002   18       0          0                  1        15.408756   
2       1003   15       0          2                  3         4.210570   
3       1004   17       1          0                  3        10.028829   
4       1005   17       1          0                  2         4.672495   

   Absences  Tutoring  ParentalSupport  Extracurricular  Sports  Music  \
0         7         1                2                0       0      1   
1         0         0                1                0       0      0   
2        26         0                2                0       0      0   
3        14         0                3                1       0      0   
4        17         1                3                0       0      0   

   Volunteering       GPA  GradeClass  
0             0  2.929196         2.0  
1             0  3

# MLP Model

In [17]:
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Convert X_train and Y_train to NumPy arrays if they are not already
X_np = X_train.to_numpy() if hasattr(X_train, "to_numpy") else X_train
Y_np = Y_train.to_numpy() if hasattr(Y_train, "to_numpy") else Y_train

# Define the MLP model
mlp_model = MLPRegressor(
    hidden_layer_sizes=(64, 32),  # Two hidden layers with 64 and 32 neurons
    activation='relu',  # Rectified Linear Unit activation function
    solver='adam',  # Adam optimizer
    max_iter=50,  # Maximum number of iterations (epochs)
    random_state=42,  # Random state for reproducibility
)

# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scoring_metrics = {
    'MSE': mean_squared_error,
    'MAE': mean_absolute_error,
    'R2': r2_score
}

all_scores = {metric: [] for metric in scoring_metrics}

for train_idx, val_idx in kf.split(X_np):
    X_cv_train, X_cv_val = X_np[train_idx], X_np[val_idx]
    Y_cv_train, Y_cv_val = Y_np[train_idx], Y_np[val_idx]

    mlp_model.fit(X_cv_train, Y_cv_train)
    y_cv_pred = mlp_model.predict(X_cv_val)

    for metric_name, metric_func in scoring_metrics.items():
        all_scores[metric_name].append(metric_func(Y_cv_val, y_cv_pred))

mean_scores = {metric: np.mean(scores) for metric, scores in all_scores.items()}

# Print cross-validation results
print('------ Cross-Validation Results: ------')
for metric, score in mean_scores.items():
    print(f'{metric}: {score}')

# Train the model on the full training set
mlp_model.fit(X_np, Y_np)

# Predict on the test set
mlp_pred = mlp_model.predict(X_test)

# Evaluate the model
mae_model4 = mean_absolute_error(Y_test, mlp_pred)
mse_model4 = mean_squared_error(Y_test, mlp_pred)
rmse_model4 = np.sqrt(mse_model4)
r2_model4 = r2_score(Y_test, mlp_pred)

print('------ Test Set Evaluation: ------')
print('Mean Absolute Error', mae_model4)
print('Mean Squared Error:', mse_model4)
print('Root Mean Squared Error:', rmse_model4)
print('R2 Score:', r2_model4)



------ Cross-Validation Results: ------
MSE: 0.07748722526142184
MAE: 0.22371758854943308
R2: 0.9063242582814638
------ Test Set Evaluation: ------
Mean Absolute Error 0.2012109005116432
Mean Squared Error: 0.06217013441386982
Root Mean Squared Error: 0.249339396032536
R2 Score: 0.9289550777907785




## TabNet

In [18]:
from pytorch_tabnet.tab_model import TabNetRegressor

# Reshape labels for TabNet compatibility
Y_train_tabnet = Y_train.values.reshape(-1, 1)  # Reshape to 2D
Y_val_tabnet = Y_val.values.reshape(-1, 1)      # Reshape to 2D

# Instantiate TabNet model
tabnet_model = TabNetRegressor()
tabnet_model.fit(
    X_train, Y_train_tabnet,  # Use reshaped Y_train
    eval_set=[(X_val, Y_val_tabnet)],  # Use reshaped Y_val
    eval_metric=['mse'],
    patience=5,
    max_epochs=50
)

tabnet_pred = tabnet_model.predict(X_test)

# evaluation
mae_model5 = mean_absolute_error(Y_test, tabnet_pred)
mse_model5 = mean_squared_error(Y_test, tabnet_pred)
rmse_model5 = np.sqrt(mse_model5)
r2_model5 = r2_score(Y_test, tabnet_pred)

print('Mean Absolute Error', mae_model5)
print('Mean Squared Error:', mse_model5)
print('Root Mean Squared Error:', rmse_model5)
print('R2 Score:', r2_model5)



epoch 0  | loss: 9.17376 | val_0_mse: 9.54685 |  0:00:00s
epoch 1  | loss: 7.26037 | val_0_mse: 6.72676 |  0:00:00s
epoch 2  | loss: 5.36512 | val_0_mse: 5.18418 |  0:00:00s
epoch 3  | loss: 4.36627 | val_0_mse: 4.20072 |  0:00:00s
epoch 4  | loss: 3.67885 | val_0_mse: 3.44159 |  0:00:00s
epoch 5  | loss: 3.14873 | val_0_mse: 2.84822 |  0:00:01s
epoch 6  | loss: 2.63231 | val_0_mse: 2.38738 |  0:00:01s
epoch 7  | loss: 2.22859 | val_0_mse: 1.99877 |  0:00:01s
epoch 8  | loss: 1.91147 | val_0_mse: 1.64181 |  0:00:01s
epoch 9  | loss: 1.67334 | val_0_mse: 1.40845 |  0:00:01s
epoch 10 | loss: 1.40459 | val_0_mse: 1.17822 |  0:00:01s
epoch 11 | loss: 1.08523 | val_0_mse: 1.0235  |  0:00:02s
epoch 12 | loss: 0.9216  | val_0_mse: 0.9095  |  0:00:02s
epoch 13 | loss: 0.81488 | val_0_mse: 0.82757 |  0:00:02s
epoch 14 | loss: 0.68797 | val_0_mse: 0.7607  |  0:00:02s
epoch 15 | loss: 0.6085  | val_0_mse: 0.67604 |  0:00:02s
epoch 16 | loss: 0.54757 | val_0_mse: 0.5865  |  0:00:03s
epoch 17 | los



# TabPFN Model

In [9]:
# TabPFN
from tabpfn import TabPFNRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Train and predict TabPFN
reg = TabPFNRegressor(random_state=42)
reg.fit(X_train, Y_train)
Y3_pred = reg.predict(X_test)

# evaluation
print('Mean Squared Error:', mean_squared_error(Y_test, Y3_pred))
print('Mean Absolute Error:', mean_absolute_error(Y_test, Y3_pred))

Mean Squared Error: 0.029019828192856097
Mean Absolute Error: 0.12807124210155396


## 1D CNN with Soft Ordering 

In [19]:
import torch
import torch.nn as nn
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from torchmetrics import MeanSquaredError, MeanAbsoluteError, R2Score
import numpy as np
import pandas as pd

# Loading Dataset
data = pd.read_csv("database.csv") # change path for your env
#data = pd.read_csv("SmartStudy\\notebooks\\database.csv") # change path for your env
data.head()

# Data Splitting & Normalization
scaler = StandardScaler()
input = data.drop(columns=['GPA'], errors='ignore')
input = scaler.fit_transform(input)
labels = data['GPA']
X_train, X_test, Y_train, Y_test = train_test_split(input, labels, test_size=0.2, random_state=42)
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

class SoftOrdering1DCNN(pl.LightningModule):

    def __init__(self, input_dim, output_dim=1, sign_size=32, cha_input=16, cha_hidden=32,
                 K=2, dropout_input=0.2, dropout_hidden=0.2, dropout_output=0.2, learning_rate=1e-3):
        super().__init__()

        hidden_size = sign_size * cha_input
        sign_size1 = sign_size
        sign_size2 = sign_size // 2
        output_size = (sign_size2) * cha_hidden

        self.hidden_size = hidden_size
        self.cha_input = cha_input
        self.cha_hidden = cha_hidden
        self.K = K
        self.sign_size1 = sign_size1
        self.sign_size2 = sign_size2
        self.output_size = output_size
        self.dropout_input = dropout_input
        self.dropout_hidden = dropout_hidden
        self.dropout_output = dropout_output
        self.learning_rate = learning_rate

        self.batch_norm1 = nn.BatchNorm1d(input_dim)
        self.dropout1 = nn.Dropout(dropout_input)
        dense1 = nn.Linear(input_dim, hidden_size, bias=False)
        self.dense1 = nn.utils.weight_norm(dense1)

        # 1st conv layer
        self.batch_norm_c1 = nn.BatchNorm1d(cha_input)
        conv1 = nn.Conv1d(
            cha_input,
            cha_input * K,
            kernel_size=5,
            stride=1,
            padding=2,
            groups=cha_input,
            bias=False)
        self.conv1 = nn.utils.weight_norm(conv1, dim=None)

        self.ave_po_c1 = nn.AdaptiveAvgPool1d(output_size=sign_size2)

        # 2nd conv layer
        self.batch_norm_c2 = nn.BatchNorm1d(cha_input * K)
        self.dropout_c2 = nn.Dropout(dropout_hidden)
        conv2 = nn.Conv1d(
            cha_input * K,
            cha_hidden,
            kernel_size=3,
            stride=1,
            padding=1,
            bias=False)
        self.conv2 = nn.utils.weight_norm(conv2, dim=None)

        # 3rd conv layer (Output layer)
        self.batch_norm_c3 = nn.BatchNorm1d(cha_hidden)
        self.dropout_c3 = nn.Dropout(dropout_output)
        self.dense2 = nn.Linear(output_size, output_dim)

        self.mse = MeanSquaredError()
        self.mae = MeanAbsoluteError()
        self.r2 = R2Score()

    def forward(self, x):
        if x.shape[1] != self.dense1.in_features:
            raise ValueError(f"Input feature size mismatch. Expected {self.dense1.in_features}, got {x.shape[1]}.")

        x = self.batch_norm1(x)
        x = self.dropout1(x)
        x = torch.relu(self.dense1(x))

        x = x.reshape(x.shape[0], self.cha_input, self.sign_size1)

        x = self.batch_norm_c1(x)
        x = torch.relu(self.conv1(x))

        x = self.ave_po_c1(x)

        x = self.batch_norm_c2(x)
        x = self.dropout_c2(x)
        x = torch.relu(self.conv2(x))

        x = self.batch_norm_c3(x)
        x = self.dropout_c3(x)
        x = x.view(x.size(0), -1)
        x = self.dense2(x)

        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.MSELoss()(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.MSELoss()(y_hat, y)
        self.log('val_loss', loss)
        self.log('val_mse', self.mse(y_hat, y))
        self.log('val_mae', self.mae(y_hat, y))
        self.log('val_r2', self.r2(y_hat, y))

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train.values, dtype=torch.float32).reshape(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
Y_val_tensor = torch.tensor(Y_val.values, dtype=torch.float32).reshape(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test.values, dtype=torch.float32).reshape(-1, 1)

# Create datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, Y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

# Instantiate the model
input_dim = X_train_tensor.shape[1]
CNN_model = SoftOrdering1DCNN(input_dim=input_dim)

# Configure Trainer and callbacks
early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=5)
trainer = pl.Trainer(max_epochs=50, callbacks=[early_stopping]) # callbacks=[early_stopping]

# Train the model
trainer.fit(CNN_model, train_loader, val_loader)  # Use train and validation loaders

# Make predictions and evaluate
predictions = []
CNN_model.eval()  # Set model to evaluation mode
with torch.no_grad():
    for x, _ in test_loader:
        predictions.append(CNN_model(x))
predictions = torch.cat(predictions).detach().numpy()

# Calculate and print evaluation metrics
mse_model7 = mean_squared_error(Y_test, predictions)
mae_model7 = mean_absolute_error(Y_test, predictions)
rmse_model7 = np.sqrt(mse_model7)
r2_model7 = r2_score(Y_test, predictions)

print('Mean Absolute Error', mae_model7)
print('Mean Squared Error:', mse_model7)
print('Root Mean Squared Error:', rmse_model7)
print('R2 Score:', r2_model7)

  WeightNorm.apply(module, name, dim)
You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name          | Type              | Params | Mode 
-------------------------------------------------------------
0  | batch_norm1   | BatchNorm1d       | 28     | train
1  | dropout1      | Dropout           | 0      | train
2  | dense1        | Linear            | 7.7 K  | train
3  | batch_norm_c1 | BatchNorm1d       | 32     | train
4  | conv1         | Conv1d            | 161    | train
5  | ave_po_c1     | AdaptiveAvgPool1d | 0      | train
6  | batch_norm_c2 | BatchNorm1d       | 64     | train
7  | dropout_c2    | Dropout           | 0      | train
8  | conv2         | Conv1d            | 3.1 K  | train
9  | batch_norm_c3 | BatchNorm1d       | 64     | train
10 | dropout_c3    | Dropout           

Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00, 53.11it/s]

c:\Users\eblac\anaconda3\envs\smartstudy_env\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

c:\Users\eblac\anaconda3\envs\smartstudy_env\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
c:\Users\eblac\anaconda3\envs\smartstudy_env\Lib\site-packages\pytorch_lightning\loops\fit_loop.py:310: The number of training batches (48) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 10: 100%|██████████| 48/48 [00:01<00:00, 41.14it/s, v_num=17]
Mean Absolute Error 0.2927950592641271
Mean Squared Error: 0.13432998601856344
Root Mean Squared Error: 0.3665105537615028
R2 Score: 0.8375563090156952
