In [1]:
# necessary imports
import torch, torch.nn as nn, torch.optim as optim
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
print("Using torch", torch.__version__)

Using torch 2.3.0+cu121


In [2]:
# Setting the seed
# replace student id (here 34xxx) with last 5 digits of your own student id
studentid = 34772
torch.manual_seed(studentid)

<torch._C.Generator at 0x7dfa041ec2b0>

In [3]:
# Load the dataset
from sklearn.datasets import load_breast_cancer
breastcancer = load_breast_cancer()
print(breastcancer.data.shape, breastcancer.target.shape)
print(breastcancer.feature_names)
print(breastcancer.target_names)

(569, 30) (569,)
['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
['malignant' 'benign']


In [4]:
X = breastcancer.data
y = breastcancer.target
print(X.shape, y.shape)

(569, 30) (569,)


In [5]:
# divide dataset into TRAIN, VALIDATION and TEST
from sklearn.model_selection import train_test_split
test_size = 0.1
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=test_size, shuffle=True, stratify=y, random_state=studentid)
print(X_train_val.shape, y_train_val.shape, X_test.shape, y_test.shape)

(512, 30) (512,) (57, 30) (57,)


In [6]:
X_train_val = torch.tensor(X_train_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train_val = torch.tensor(y_train_val, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
y_train_val = y_train_val.reshape(-1,1)
y_test = y_test.reshape(-1,1)
print(X_train_val.shape, X_test.shape, y_train_val.shape, y_test.shape)

torch.Size([512, 30]) torch.Size([57, 30]) torch.Size([512, 1]) torch.Size([57, 1])


In [7]:
# performs 5-fold cross validation to evaluate your models with 20% validation set
# hyperparameter selection should be done using X_train_val, y_train_val dataset
val_size = 0.2

In [7]:
# set model hyperparameters
d_in = X_train_val.shape[1] # this is fixed
d_out = 1 #this is also fixed because it is a binary classification problem

# use three different hidden layer configuration, one of which must be a network with 3-hidden layers (80, 40, 20)
h_layers = [(80, 40, 20), (60,30,15,10), (100,50,25,10,5)]

# use Sigmoid, Tanh, ReLU and LeakyReLU activation functions
# -> use the same activation function across the neural network
activation_fn = [nn.Sigmoid(), nn.ReLU(), nn.Tanh(), nn.LeakyReLU() ]

learning_rate = [0.01, 0.001, 0.0001]
n_epochs = [200,250,300]
d_out = 1

# Creating A Function for Model

In [8]:
def create_model(d_in, h_layers, activation_fn, d_out):
  layers = []

  # Input layer
  layers.append(nn.Linear(d_in, h_layers[0], bias=True))
  layers.append(activation_fn)

  # Hidden layers
  for i in range(len(h_layers) - 1):
    layers.append(nn.Linear(h_layers[i], h_layers[i+1]))
    layers.append(activation_fn)

  # Output layer
  layers.append(nn.Linear(h_layers[-1], d_out))
  layers.append(nn.Sigmoid()) #output transformation - due to being a binary classification problem we need Sigmoid function here

  model = nn.Sequential(*layers)
  return model

In [9]:
create_model(d_in, h_layers[0], nn.Sigmoid(), d_out)

Sequential(
  (0): Linear(in_features=30, out_features=80, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=80, out_features=40, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=40, out_features=20, bias=True)
  (5): Sigmoid()
  (6): Linear(in_features=20, out_features=1, bias=True)
  (7): Sigmoid()
)

# Creating Loss and Optimizer

In [10]:
# loss function and optimizer
# use binary cross entropy loss and Adam optimizer
loss_fn = nn.BCELoss()

import torch.optim as optim

# use 3 different learning rates 0.01, 0.001 and 0.0001
learning_rate = [0.01, 0.001, 0.0001]

def create_optimizer(model, learning_rate):
  optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  return optimizer

# Creating Function for Model Training

In [11]:
def train_model(model, X_train, y_train, learning_rate, epochs, optimizer):
    #optimizer = optim.Adam(model.parameters(), lr=learning_rate)

  for epoch in range(epochs):
    model.train()
    # forward pass
    y_pred = model(X_train)
    loss = loss_fn(y_pred, y_train)
    # backward pass
    # zero out grads
    optimizer.zero_grad()
    # compute gradients
    loss.backward()
    # update
    optimizer.step()
  #model.eval()
  #y_pred = model(X_test)
  #loss = loss_fn(y_pred, y_test)
  #print(f"Epoch {epoch} loss: {float(loss)}")
  #accuracy = accuracy_score(y_test, y_pred)
  #f1 = f1_score(y_test, y_pred)

  return model

In [12]:
def cross_validate_model(d_in, h_layers, activation_fn, d_out, learning_rate, X_train, y_train, epochs, n_splits=5):
  kf = KFold(n_splits=n_splits, shuffle=True, random_state = studentid)
  accuracies = []
  f1_scores = []
  losses = []

  for train_index, val_index in kf.split(X_train):
    X_train_cv, X_test_cv = X_train[train_index], X_train[val_index]
    y_train_cv, y_test_cv = y_train[train_index], y_train[val_index]

    model = create_model(d_in, h_layers, activation_fn, d_out)
    optimizer = create_optimizer(model, learning_rate)

    model = train_model(model, X_train_cv, y_train_cv, learning_rate, epochs, optimizer)

    model.eval()
    y_pred = model(X_test_cv)
    loss = loss_fn(y_pred, y_test_cv)

    y_pred_np = y_pred.detach().numpy()
    y_pred_binary = (y_pred_np >= 0.5).astype(int)

    accuracies.append(accuracy_score(y_test_cv, y_pred_binary))
    f1_scores.append(f1_score(y_test_cv, y_pred_binary))
    losses.append(loss.detach().numpy())
    #print(f"Epoch {epoch} loss: {float(loss)}")

  return np.mean(accuracies), np.mean(f1_scores), np.mean(losses)

In [13]:
mean_accuracy, mean_f1, losses = cross_validate_model(d_in, h_layers[0], activation_fn[0], d_out, 0.01, X_train_val, y_train_val, epochs=200)

In [14]:
np.mean(losses)

0.20041709

In [22]:
# performs 5-fold cross validation (use mean of the val-scores) and select best performing model
# Plot the training and validation accuracy/loss curves.

best_accuracy = 0
best_f1 = 0
best_params = {}
losses = []
epochs = []

for i in activation_fn:
  for j in h_layers:
    for k in learning_rate:
      for t in n_epochs:
        mean_accuracy, mean_f, mean_loss = cross_validate_model(d_in, j, i, d_out, k, X_train_val, y_train_val, epochs=t)
        print(f"Params: {i}, {j}, {k} {t} -> Accuracy: {mean_accuracy}, F1 Score: {mean_f1}")
        losses.append(mean_loss)
        epochs.append(t)


        if mean_accuracy > best_accuracy:
          best_accuracy = mean_accuracy
          best_f1 = mean_f1
          best_params = {'hidden_layers': j, 'activation': i, 'learning_rate': k, 'epochs': t}

print("Best Parameters:", best_params)
print("Best Accuracy:", best_accuracy)
print("Best F1 Score:", best_f1)

Params: Sigmoid(), (80, 40, 20), 0.01 200 -> Accuracy: 0.8767942128307634, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.01 250 -> Accuracy: 0.9238339996192652, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.01 300 -> Accuracy: 0.9413858747382449, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.001 200 -> Accuracy: 0.9141062250142775, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.001 250 -> Accuracy: 0.9355606320197982, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.001 300 -> Accuracy: 0.9199124309918142, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.0001 200 -> Accuracy: 0.626956025128498, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.0001 250 -> Accuracy: 0.626956025128498, F1 Score: 0.937929605907253
Params: Sigmoid(), (80, 40, 20), 0.0001 300 -> Accuracy: 0.626956025128498, F1 Score: 0.937929605907253
Params: Sigmoid(), (60, 30, 15, 10), 0.01 200 -> Accuracy: 0.919931

In [None]:
'''
Best parameters turned out to be:
Best Parameters: {'hidden_layers': (60, 30, 15, 10), 'activation': LeakyReLU(negative_slope=0.01), 'learning_rate': 0.01, 'epochs': 250}
Best Accuracy: 0.9512278697886922
Best F1 Score: 0.937929605907253
Now I will train the model with the best parameters
'''

In [33]:
# divide dataset into TRAIN, VALIDATION and TEST
#for avoiding any mix-ups with above functions, I run the splits again here with the same random_state to obtain the initial X test and X trains
from sklearn.model_selection import train_test_split
test_size = 0.1
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=test_size, shuffle=True, stratify=y, random_state=studentid)
print(X_train_val.shape, y_train_val.shape, X_test.shape, y_test.shape)

X_train_val = torch.tensor(X_train_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train_val = torch.tensor(y_train_val, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
y_train_val = y_train_val.reshape(-1,1)
y_test = y_test.reshape(-1,1)

(512, 30) (512,) (57, 30) (57,)


In [34]:
# train model with whole train_val dataset
model2deploy = create_model(d_in, best_params['hidden_layers'], best_params['activation'], d_out)
optimizer = optim.Adam(model2deploy.parameters(), best_params['learning_rate'])
train_model(model2deploy, X_train_val, y_train_val, best_params['learning_rate'], best_params['epochs'], optimizer)

Sequential(
  (0): Linear(in_features=30, out_features=100, bias=True)
  (1): LeakyReLU(negative_slope=0.01)
  (2): Linear(in_features=100, out_features=50, bias=True)
  (3): LeakyReLU(negative_slope=0.01)
  (4): Linear(in_features=50, out_features=25, bias=True)
  (5): LeakyReLU(negative_slope=0.01)
  (6): Linear(in_features=25, out_features=10, bias=True)
  (7): LeakyReLU(negative_slope=0.01)
  (8): Linear(in_features=10, out_features=5, bias=True)
  (9): LeakyReLU(negative_slope=0.01)
  (10): Linear(in_features=5, out_features=1, bias=True)
  (11): Sigmoid()
)

In [35]:
# Evaluate performance of the best performing model on the test dataset

model2deploy.eval()
y_pred = model2deploy(X_test)
test_loss = loss_fn(y_pred, y_test)

y_pred_np = y_pred.detach().numpy() #converting to numpy array in order to perform operations
y_pred_binary = (y_pred_np >= 0.5).astype(int)

test_accuracy = accuracy_score(y_test, y_pred_binary)

print(f"Test accuracy: {test_accuracy}, Test loss: {test_loss}")

Test accuracy: 0.9649122807017544, Test loss: 0.0848802998661995


In [38]:
# save best performing model to be deployed together with the hyperparameters
PATH = f"model_breast_cancer_{studentid}"
torch.save(model2deploy, PATH)
# save hyperparameters values for the best performing model
hyperparameters = {
    'layer_sizes': best_params['hidden_layers'],
    'activation_fn': best_params['activation'],
    'learning_rate': best_params['learning_rate'],
    'n_epoch': best_params['epochs']
}
import pickle
PATH_HYP = f"hyperparameters_{studentid}"
with open(PATH_HYP, 'wb') as f:
    pickle.dump(hyperparameters, f)

First, I defined a function for creating the model. I needed that function because for each hyperparameter combination, my model would be different.

Then, I defined my loss function. (binary cross entropy loss)

After that, I created another function for Optimizer, because there are again, different learning rates

I created a function for model training, I needed this for cross validation and hyperparameter tuning.

Lastly, I defined a function for implementing cross validations

My cross validation function included other functions in order to work.
I created for loops, for hyperparameter combinations. Each time, I kept that combinations average accuracy and F1 score (cross validation folds' average scores)

Using that average accuracy and F1 score lists, I obtained the best model and best parameters.

By using the best parameters, I trained my model on the whole X_train_val dataset.

Using this newly trained model2deploy named model, I made predictions for the test dataset and found the related accuracy and F1 scores.