# Multi-layer Artificial Neural Network

In [8]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader, TensorDataset
from feature_engineering import process_categorical_columns


In [9]:

data = pd.read_csv('StudentPerformanceFactorsCleaned.csv')

X = data.drop(columns=['Exam_Score'])
y = data['Exam_Score']













In [10]:
categorical_columns = X.select_dtypes(include=['object']).columns
numerical_columns = X.select_dtypes(include=['int64', 'float64']).columns

numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop='first')


preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_columns),
        ('cat', categorical_transformer, categorical_columns)
    ]
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply transformations
X_train_transformed = preprocessor.fit_transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

In [11]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_transformed, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_transformed, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [12]:
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(64, 1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

In [13]:
# Assuming X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor are already defined
# Define model
input_size = X_train_tensor.shape[1]
model = MLP(input_size)

# Define loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop without DataLoader
batch_size = 32  # Define your batch size
epochs = 2000

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    # Manually create mini-batches
    for i in range(0, len(X_train_tensor), batch_size):
        # Select a batch
        inputs = X_train_tensor[i:i+batch_size]
        targets = y_train_tensor[i:i+batch_size]

        # Zero the gradient buffers
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Compute the loss
        loss = criterion(outputs, targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    # Print loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {running_loss / (len(X_train_tensor) // batch_size):.4f}')

# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    y_pred = model(X_test_tensor).numpy()
    y_true = y_test_tensor.numpy()

mse = mean_squared_error(y_true, y_pred)
print(f'Mean Squared Error on Test Data: {mse}')

Epoch 100/2000, Loss: 56.3099
Epoch 200/2000, Loss: 39.0357
Epoch 300/2000, Loss: 29.8953
Epoch 400/2000, Loss: 21.9508
Epoch 500/2000, Loss: 15.0944
Epoch 600/2000, Loss: 10.1218
Epoch 700/2000, Loss: 6.3644
Epoch 800/2000, Loss: 4.0048
Epoch 900/2000, Loss: 3.1520
Epoch 1000/2000, Loss: 2.0397
Epoch 1100/2000, Loss: 1.7975
Epoch 1200/2000, Loss: 1.8834
Epoch 1300/2000, Loss: 1.6445
Epoch 1400/2000, Loss: 1.6109
Epoch 1500/2000, Loss: 1.7259
Epoch 1600/2000, Loss: 1.7752
Epoch 1700/2000, Loss: 1.9338
Epoch 1800/2000, Loss: 1.3812
Epoch 1900/2000, Loss: 1.2073
Epoch 2000/2000, Loss: 1.8820
Mean Squared Error on Test Data: 5.265836238861084
