PyTorch Neural Net Optimization

In [10]:
import pandas as pd
import torch.nn as nn
import numpy as np
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import KFold

In [11]:
df = pd.read_csv("math_train.csv")
df.head()

y = df["G3"] #separate target variable
X = df.drop(["G1", "G2", "G3"], axis=1) #drop unneeded columns from X

In [12]:
#one hot encode categorical variables in X
X_encode = pd.get_dummies(X, columns=['school', 'sex', 'age', 'address',
                                       'famsize', 'Pstatus', 'Mjob', 'Fjob',
                                       'reason', 'guardian', 'schoolsup',
                                       'famsup', 'paid', 'activities', 'nursery',
                                       'higher', 'internet', 'romantic'], drop_first=True)
X_encode.head()

Unnamed: 0,Medu,Fedu,traveltime,studytime,failures,famrel,freetime,goout,Dalc,Walc,...,guardian_mother,guardian_other,schoolsup_yes,famsup_yes,paid_yes,activities_yes,nursery_yes,higher_yes,internet_yes,romantic_yes
0,3,3,1,2,0,4,2,3,1,2,...,True,False,False,False,True,True,True,True,True,True
1,2,3,2,1,0,5,3,3,1,1,...,False,False,False,False,False,False,True,True,True,False
2,1,3,1,2,3,4,3,5,1,1,...,True,False,False,False,False,True,False,True,True,True
3,4,3,1,3,0,3,4,4,2,4,...,True,False,True,True,True,True,True,True,True,False
4,2,1,2,1,0,3,3,2,1,3,...,True,False,False,False,False,True,False,True,False,False


In [13]:
#Scale X and y using standard scaler
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X = X_scaler.fit_transform(X_encode)
y = y_scaler.fit_transform(y.values.reshape(-1, 1))

#Convert NumPy arrays to PyTorch tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

#Create TensorDatasets
data = TensorDataset(X, y)

#Create dataloader for batching
batch_size = 32
loader = DataLoader(data, batch_size=batch_size, shuffle=True)

In [14]:
#Define class for neural net with regression output
class NeuralNet(nn.Module):
    def __init__(self, num_features, h1, h2):
        super(NeuralNet, self).__init__()
        self.hidden1 = nn.Linear(num_features, h1)
        self.hidden2 = nn.Linear(h1, h2)
        self.output = nn.Linear(h2, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.hidden1(x))
        x = self.relu(self.hidden2(x))
        x = self.output(x)
        return x

In [15]:
#Create function that trains a model based on given inputs
def train_model(model, train_loader, optimizer, criterion, epochs=50):
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    return running_loss / len(train_loader)

In [16]:
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

#Create a function that cross validates
def cross_val(X, y, h1, h2, lr, batch_size, epochs=30):
    losses = []

    for train_idx, val_idx in kfold.split(X):
        X_train, X_val = X[train_idx], X[val_idx]
        y_train, y_val = y[train_idx], y[val_idx]

        train_loader = DataLoader(
            TensorDataset(X_train, y_train),
            batch_size=batch_size,
            shuffle=True
        )

        model = NeuralNet(X.shape[1], h1, h2)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        criterion = nn.MSELoss()

        train_model(model, train_loader, optimizer, criterion, epochs)

        model.eval()
        with torch.no_grad():
            preds = model(X_val)
            val_loss = criterion(preds, y_val).item()
            losses.append(val_loss)

    return sum(losses) / len(losses)



In [18]:
best_params = None
best_loss = 100
print("Learning Rate, Hidden Layer 1, Hidden Layer 2, Batch size, Cross Val Loss")

#iterate through hyperparameters to find best
for lr in [0.001, 0.01]:
    for h1 in [32, 64]:
        for h2 in [16, 32]:
            for bs in [16, 32]:

                cv_loss = cross_val(
                    X, y, h1=h1, h2=h2, lr=lr, batch_size=bs, epochs=30)

                print(f"Lr={lr}, H1={h1}, H2={h2}, Bs={bs}, CV Loss = {cv_loss}")

                if cv_loss < best_loss:
                    best_loss = cv_loss
                    best_params = {"lr": lr, "h1": h1, "h2": h2, "batch_size": bs}

print("\n Best Parameters:", best_params)
print("Best CV Loss:", best_loss)

Learning Rate, Hidden Layer 1, Hidden Layer 2, Batch size, Cross Val Loss
Lr=0.001, H1=32, H2=16, Bs=16, CV Loss = 1.163056206703186
Lr=0.001, H1=32, H2=16, Bs=32, CV Loss = 1.0034314274787903
Lr=0.001, H1=32, H2=32, Bs=16, CV Loss = 1.153639817237854
Lr=0.001, H1=32, H2=32, Bs=32, CV Loss = 1.0847496032714843
Lr=0.001, H1=64, H2=16, Bs=16, CV Loss = 1.1218148708343505
Lr=0.001, H1=64, H2=16, Bs=32, CV Loss = 1.0870077967643739
Lr=0.001, H1=64, H2=32, Bs=16, CV Loss = 1.1239054918289184
Lr=0.001, H1=64, H2=32, Bs=32, CV Loss = 1.0762189030647278
Lr=0.01, H1=32, H2=16, Bs=16, CV Loss = 1.015072476863861
Lr=0.01, H1=32, H2=16, Bs=32, CV Loss = 1.171541655063629
Lr=0.01, H1=32, H2=32, Bs=16, CV Loss = 0.9906273126602173
Lr=0.01, H1=32, H2=32, Bs=32, CV Loss = 1.1211448550224303
Lr=0.01, H1=64, H2=16, Bs=16, CV Loss = 1.1292776703834533
Lr=0.01, H1=64, H2=16, Bs=32, CV Loss = 1.1309869766235352
Lr=0.01, H1=64, H2=32, Bs=16, CV Loss = 0.9876032590866088
Lr=0.01, H1=64, H2=32, Bs=32, CV Loss