<a href="https://colab.research.google.com/github/asyraffatha/Task-MachineLearning/blob/main/Week%2010/MLP_Regression_Asyraff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install torch scikit-learn pandas



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
file_path = '/content/drive/MyDrive/StudentsPerformance.csv'


In [5]:
import pandas as pd

# Load dataset from Google Drive
data = pd.read_csv('/content/drive/MyDrive/StudentsPerformance.csv')

# Display dataset information
print("Shape of the dataset:", data.shape)
print("Columns:", data.columns)
print(data.head())


Shape of the dataset: (1000, 8)
Columns: Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score',
       'writing score'],
      dtype='object')
   gender race/ethnicity parental level of education         lunch  \
0  female        group B           bachelor's degree      standard   
1  female        group C                some college      standard   
2  female        group B             master's degree      standard   
3    male        group A          associate's degree  free/reduced   
4    male        group C                some college      standard   

  test preparation course  math score  reading score  writing score  
0                    none          72             72             74  
1               completed          69             90             88  
2                    none          90             95             93  
3                    none          47             57             44  
4

In [6]:
print(data.columns.tolist())


['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course', 'math score', 'reading score', 'writing score']


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load dataset
data = pd.read_csv('/content/drive/MyDrive/StudentsPerformance.csv')

# Target variable
target = "math score"

# Separate features and target
X = data.drop(columns=[target])
y = data[target]

# Identify categorical and numerical columns
categorical_columns = X.select_dtypes(include=["object"]).columns
numerical_columns = X.select_dtypes(include=["number"]).columns

# Encode categorical columns and scale numerical columns
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_columns),
        ("cat", OneHotEncoder(drop="first"), categorical_columns),
    ]
)

# Apply transformations
X = preprocessor.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert target to NumPy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)

print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)



Shape of X_train: (800, 14)
Shape of y_train: (800,)


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Prepare DataLoader
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define MLP Model
class MLPModel(nn.Module):
    def __init__(self, input_size, hidden_layers, activation):
        super(MLPModel, self).__init__()
        layers = []
        prev_size = input_size
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(activation())
            prev_size = hidden_size
        layers.append(nn.Linear(prev_size, 1))  # Output layer
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Example: MLP with 1 hidden layer of 32 neurons and ReLU
input_size = X_train.shape[1]
hidden_layers = [32]
activation = nn.ReLU

model = MLPModel(input_size, hidden_layers, activation)


In [11]:
def train_model(model, train_loader, epochs, learning_rate):
    # Define loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            y_pred = model(X_batch).squeeze()
            loss = criterion(y_pred, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(train_loader)}")

# Train the model
train_model(model, train_loader, epochs=10, learning_rate=0.01)


Epoch 1/10, Loss: 4192.9948828125
Epoch 2/10, Loss: 2254.4563134765626
Epoch 3/10, Loss: 359.83982238769534
Epoch 4/10, Loss: 143.04339294433595
Epoch 5/10, Loss: 120.70472564697266
Epoch 6/10, Loss: 110.18603637695313
Epoch 7/10, Loss: 102.13147003173827
Epoch 8/10, Loss: 93.40913421630859
Epoch 9/10, Loss: 84.79495895385742
Epoch 10/10, Loss: 76.61862106323242


In [12]:
# Define parameter combinations
hidden_layers_list = [[32], [32, 16], [64, 32, 16]]  # 1, 2, 3 hidden layers
activation_functions = [nn.ReLU, nn.Sigmoid]
epochs_list = [10, 50]
learning_rates = [0.01, 0.001]
batch_sizes = [32, 128]

# Loop through parameters
results = []
for hidden_layers in hidden_layers_list:
    for activation in activation_functions:
        for epochs in epochs_list:
            for lr in learning_rates:
                for batch_size in batch_sizes:
                    print(f"Testing: {hidden_layers}, {activation.__name__}, Epochs={epochs}, LR={lr}, Batch={batch_size}")

                    # Update DataLoader with new batch size
                    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

                    # Initialize model
                    model = MLPModel(input_size, hidden_layers, activation)

                    # Train model
                    train_model(model, train_loader, epochs, lr)


Testing: [32], ReLU, Epochs=10, LR=0.01, Batch=32
Epoch 1/10, Loss: 4373.2120703125
Epoch 2/10, Loss: 2703.905654296875
Epoch 3/10, Loss: 503.6021936035156
Epoch 4/10, Loss: 149.6594497680664
Epoch 5/10, Loss: 124.94898376464843
Epoch 6/10, Loss: 115.94307983398437
Epoch 7/10, Loss: 106.61046630859374
Epoch 8/10, Loss: 97.39236297607422
Epoch 9/10, Loss: 88.69726654052734
Epoch 10/10, Loss: 80.80475082397462
Testing: [32], ReLU, Epochs=10, LR=0.01, Batch=128
Epoch 1/10, Loss: 4561.210797991072
Epoch 2/10, Loss: 4371.184500558035
Epoch 3/10, Loss: 4067.1446358816966
Epoch 4/10, Loss: 3586.7580915178573
Epoch 5/10, Loss: 3006.585693359375
Epoch 6/10, Loss: 2282.3587472098216
Epoch 7/10, Loss: 1508.4278215680804
Epoch 8/10, Loss: 822.2852434430804
Epoch 9/10, Loss: 365.98080008370533
Epoch 10/10, Loss: 170.4018293108259
Testing: [32], ReLU, Epochs=10, LR=0.001, Batch=32
Epoch 1/10, Loss: 4613.676787109375
Epoch 2/10, Loss: 4561.2983203125
Epoch 3/10, Loss: 4491.4065234375
Epoch 4/10, Loss

In [13]:
def evaluate_model(model, X_test_tensor, y_test_tensor):
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_tensor).squeeze()
        mse = nn.MSELoss()(y_pred, y_test_tensor)
    print(f"Test MSE: {mse.item()}")

# Evaluate model
evaluate_model(model, X_test_tensor, y_test_tensor)


Test MSE: 3702.96337890625
