In [1]:
# Pytorch Libraries and more
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold


# PyTorch NEEDS a data prep or "loader"
from torch.utils.data import DataLoader, TensorDataset

from sklearn.datasets import load_wine # studied in DATA 201
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix as cm

In [2]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [5]:
## let's revisit the Wine Cultivar dataset
x = load_wine().data
y = load_wine().target

## Discussion: What is a Network Topology?

- input layer
- hidden layer with activations and connectivity
- output layer that addresses the machine learning problem to solve

In [66]:
# define a neural network model
# we care about the topology and activations; also we may include regularization such ElasticNet
class neuralnet_classfication(nn.Module): # the blueprint of the network
    def __init__(self,n_features):
        super(neuralnet_classfication, self).__init__()
        # here we defined the topology of the neural network
        # we want, for example, to have 40 neurons in the first layer
        # let's try a funnel shape with dense connections
        self.layer1 = nn.Linear(n_features, 40).double()
        self.a1 = nn.PReLU(40).double()
        self.g1 = nn.GELU().double()
        # we designed 20 neurons in the second layer
        self.layer2 = nn.Linear(40, 20).double()
        self.a2 = nn.PReLU(20).double()
        self.layer3 = nn.Linear(20,10).double()
        self.a3 = nn.PReLU(10).double()
        self.g3 = nn.GELU().double()
        # the output layer has only three neurons - the number of neurons is non-neogotiable
        self.layer_out = nn.Linear(10, 3).double()

    def forward(self, x):
        # here is where we actually apply the design to get outputs
        x = self.a1(self.layer1(x))
        x = self.a2(self.layer2(x))
        x = self.a3(self.layer3(x))
        x = self.layer_out(x)
        return x
    
    def elastic_net_regularization(self, alpha=0.01, l1_ratio=0.5):

      l1_reg = 0
      l2_reg = 0

      # Apply regularization to weights in all linear layers
      for name, param in self.named_parameters():
          if 'weight' in name:
              l1_reg += torch.norm(param, 1).double()  # L1 norm (sum of absolute values)
              l2_reg += torch.norm(param, 2).double() ** 2  # L2 norm squared (sum of squares)

      # Combine L1 and L2 regularization
      reg_loss = alpha *(l1_ratio * l1_reg + 0.5*(1-l1_ratio) * l2_reg)

      return reg_loss

In [67]:
list(model.named_parameters())

[('layer1.weight',
  Parameter containing:
  tensor([[ 0.1593, -0.0235,  0.0041, -0.1053,  0.1518,  0.2297, -0.0387,  0.0225,
            0.1275, -0.0801,  0.1013,  0.1397,  0.1318],
          [ 0.2078, -0.0763, -0.0033, -0.0346, -0.1512, -0.1727, -0.0471,  0.1098,
            0.1284,  0.2699,  0.0299, -0.2503,  0.1582],
          [-0.1821,  0.0766,  0.2101, -0.1789, -0.0852,  0.0995, -0.2194, -0.2164,
           -0.0355,  0.0049,  0.0263,  0.2563,  0.0029],
          [ 0.1207, -0.1251, -0.2289,  0.0945,  0.0161, -0.0855,  0.0930, -0.2187,
            0.0285,  0.0945,  0.2719, -0.1276, -0.1845],
          [-0.2210,  0.1306,  0.2043,  0.0736,  0.2051, -0.0037,  0.1827,  0.1189,
            0.1398,  0.0732, -0.0522, -0.1289, -0.2663],
          [-0.0907, -0.0335,  0.0729, -0.0808,  0.1710, -0.0941,  0.0036,  0.1052,
            0.0074,  0.0057, -0.1463,  0.0380,  0.0328],
          [-0.0370, -0.0104,  0.0734, -0.1045,  0.0606, -0.2407,  0.1583, -0.1512,
            0.1233,  0.1779, -0.07

In [68]:
# prep the data
# Standardize the features
scale = StandardScaler()

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.35, random_state=301)

# we need to put the data into pytorch tensors
# Convert the data to PyTorch tensors -> are the anolog of Numpy Arrays
X_train_tensor = torch.tensor(scale.fit_transform(X_train), dtype=torch.float64).to(device) # it means double precision
X_test_tensor = torch.tensor(scale.transform(X_test), dtype=torch.float64).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).to(device)

# i want to prepare an iterable object with mini-batches
# Create DataLoader for training and testing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
# this is for automating mini-stochastic batches
# so the method DataLoader is creating a iterable objects
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, shuffle=False)

In [69]:
# Initialize the model, loss function, and optimizer
model = neuralnet_classfication(X_train.shape[1]).to(device)
# the loss function (or criterion) is based on the problem you want to solve
criterion = nn.CrossEntropyLoss()
# we also have to pick a choice of a gradient-based method to update the weights
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [70]:
# Train the model
num_epochs = 30 # an epoch means a complete passage through all batches
for epoch in range(num_epochs):
    # a peculiar aspect of Pytorch -> you set the model in a "training" state
    model.train()
    for X_batch, y_batch in train_loader:
        # this resets the optimizer before each calculation of the direction for updating the weights
        optimizer.zero_grad() # resets the partial derivatives
        # do a forward propagation
        outputs = model(X_batch) # forward propagates
        # use the criterion to compute the loss of the batch
        main_loss = criterion(outputs, y_batch) # computes the loss
        reg_loss = model.elastic_net_regularization(alpha=0.1,l1_ratio=0.5)
        total_loss = main_loss + reg_loss
        # here we backpropagate
        total_loss.backward() # approximates the gradient
        optimizer.step() # updates the model parameters

    if (epoch+1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [20/30], Loss: 0.0000


In [60]:
# we check the accuracy
# Evaluate the model on the test set
model.eval() # here we lock against contamination the model parameters
with torch.no_grad():
    y_pred_list = [] # these are necessary tracker variables to extract the information
    y_true_list = []
    for X_batch, y_batch in test_loader:
        # here the outputs are the forward propagation of the model
        # at the learned weights
        outputs = model(X_batch)
        # here we apply the softmax
        _, y_pred = torch.max(outputs, 1) # this is picking the highest probability for each class
        y_pred_list.append(y_pred)
        y_true_list.append(y_batch)

    y_pred = torch.cat(y_pred_list)
    y_true = torch.cat(y_true_list)
    accuracy = accuracy_score(y_true.cpu().numpy(), y_pred.cpu().numpy())
    print(f'Accuracy on test set: {accuracy:.4f}')

Accuracy on test set: 0.3968


In [52]:
# for regression, test a similar design with the concrete data and also experiment with Gelu activations and ELastic Net