In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression


from torch.utils.data import DataLoader, TensorDataset

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

In [2]:
# let's import the wine/cultivar dataset
wine_data = load_wine()
X = wine_data.data
y = wine_data.target

In [8]:
pd.DataFrame(data=X,columns=wine_data.feature_names)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0


In [4]:
xtrain, xtest, ytrain, ytest = train_test_split(X,y,test_size=0.35,shuffle=True,random_state=301)

In [5]:
# perhaps a good idea is to scale the input features
scaler = StandardScaler()
xtrain_scaled = scaler.fit_transform(xtrain)
xtest_scaled = scaler.transform(xtest)

In [6]:
X_train_tensor = torch.tensor(xtrain_scaled, dtype=torch.float64)
X_test_tensor = torch.tensor(xtest_scaled, dtype=torch.float64)
y_train_tensor = torch.tensor(ytrain, dtype=torch.long)
y_test_tensor = torch.tensor(ytest, dtype=torch.long)

# Create DataLoader for training and testing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, shuffle=False)

In [12]:
# Define a simple neural network
class MyFirstNet(nn.Module):
    def __init__(self, n_features):
        super(MyFirstNet, self).__init__()
        # let's define the topology first
        self.layer1 = nn.Linear(n_features, 40).double()
        self.layer2 = nn.Linear(40, 30).double()
        self.layer3 = nn.Linear(30,20).double()
        self.layer_out = nn.Linear(20,3).double()
        # let's define the activations for each layer
        self.a1 = nn.PReLU().double()
        self.a2 = nn.PReLU().double()
        self.a3 = nn.PReLU().double()

    def forward(self, x):
        x = self.a1(self.layer1(x))  # Apply PReLU activation
        x = self.a2(self.layer2(x))
        x = self.a3(self.layer3(x))
        x = self.layer_out(x)
        return x

# Initialize the model, loss function, and optimizer
model = MyFirstNet(xtrain.shape[1])
# here we also choose what loss function to optimize for the machine learning
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [13]:
# Train the model
num_epochs = 20
for epoch in range(num_epochs):
    # a peculiar aspect of Pytorch -> you put the model in a "training" state
    model.train()
    for X_batch, y_batch in train_loader:
        # this resets the optimizer before each calculation of the direction for updating the weights
        optimizer.zero_grad()
        # do a forward propagation
        outputs = model(X_batch)
        # use the criterion to compute the loss of the batch
        loss = criterion(outputs, y_batch)
        # here we backpropagate to update the weigths
        loss.backward()# the approximation of the gradient 
        optimizer.step() # the actual update of weights with the algorithm chosen

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/20], Loss: 0.0000
Epoch [20/20], Loss: 0.0000


In [14]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    # always have tracker avriables for what you need
    y_pred_list = []
    y_true_list = []
    for X_batch, y_batch in train_loader:
        outputs = model(X_batch)
        _, y_pred = torch.max(outputs, 1)
        y_pred_list.append(y_pred)
        y_true_list.append(y_batch)

    y_pred = torch.cat(y_pred_list)
    y_true = torch.cat(y_true_list)
    accuracy = accuracy_score(y_true.numpy(), y_pred.numpy())
    print(f'Accuracy on train set: {accuracy:.4f}')

Accuracy on train set: 1.0000


In [15]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    # always have tracker variables for what you need
    y_pred_list = []
    y_true_list = []
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, y_pred = torch.max(outputs, 1)
        y_pred_list.append(y_pred)
        y_true_list.append(y_batch)

    y_pred = torch.cat(y_pred_list)
    y_true = torch.cat(y_true_list)
    accuracy = accuracy_score(y_true.numpy(), y_pred.numpy())
    print(f'Accuracy on test set: {accuracy:.4f}')

Accuracy on test set: 0.9524


## Stratified KFold Cross-Validation

In [18]:
def skf_evaluation(x,y,k=5):
    skf = StratifiedKFold(n_splits=k,shuffle=True,random_state=301)
    acc = []
    for j, (idxtrain, idxtest) in enumerate(skf.split(x,y)):
        xtrain_scaled = scaler.fit_transform(x[idxtrain])
        xtest_scaled = scaler.transform(x[idxtest])
        ytrain = y[idxtrain]
        ytest = y[idxtest]
        X_train_tensor = torch.tensor(xtrain_scaled, dtype=torch.float64)
        X_test_tensor = torch.tensor(xtest_scaled, dtype=torch.float64)
        y_train_tensor = torch.tensor(ytrain, dtype=torch.long)
        y_test_tensor = torch.tensor(ytest, dtype=torch.long)

        # Create DataLoader for training and testing
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        test_loader = DataLoader(test_dataset, shuffle=False)
        model = MyFirstNet(xtrain.shape[1])
        # here we also choose what loss function to optimize for the machine learning
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.01)

        # now train the model on the train data and collect the accuracy on the test
        # Train the model
        num_epochs = 20
        for epoch in range(num_epochs):
            # a peculiar aspect of Pytorch -> you put the model in a "training" state
            model.train()
            for X_batch, y_batch in train_loader:
                # this resets the optimizer before each calculation of the direction for updating the weights
                optimizer.zero_grad()
                # do a forward propagation
                outputs = model(X_batch)
                # use the criterion to compute the loss of the batch
                loss = criterion(outputs, y_batch)
                # here we backpropagate to update the weigths
                loss.backward()# the approximation of the gradient 
                optimizer.step() # the actual update of weights with the algorithm chosen
                # if (epoch+1) % 10 == 0:
                #     print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

        # use the trained model to predict on the test set
        model.eval()
        with torch.no_grad():
            # always have tracker variables for what you need
            y_pred_list = []
            y_true_list = []
            for X_batch, y_batch in test_loader:
                outputs = model(X_batch)
                _, y_pred = torch.max(outputs, 1)
                y_pred_list.append(y_pred)
                y_true_list.append(y_batch)

            y_pred = torch.cat(y_pred_list)
            y_true = torch.cat(y_true_list)
            accuracy = accuracy_score(y_true.numpy(), y_pred.numpy())
            acc.append(accuracy)
            print(f'Accuracy on test set #{j}: {accuracy:.4f}')
    return np.mean(acc)
        
     

In [20]:
skf_evaluation(X,y,k=5)

Accuracy on test set #0: 0.9722
Accuracy on test set #1: 0.9722
Accuracy on test set #2: 0.9722
Accuracy on test set #3: 1.0000
Accuracy on test set #4: 1.0000


np.float64(0.9833333333333332)

## A Regression Neural Network Model

In [2]:
data = pd.read_csv('https://github.com/dvasiliu/AML/blob/main/Data%20Sets/housing.csv?raw=true')

In [22]:
data

Unnamed: 0,town,tract,longitude,latitude,crime,residential,industrial,river,nox,rooms,older,distance,highway,tax,ptratio,lstat,cmedv
0,Nahant,2011,-70.955002,42.255001,0.00632,18.0,2.31,no,0.538,6.575,65.199997,4.0900,1,296,15.300000,4.98,24.000000
1,Swampscott,2021,-70.949997,42.287498,0.02731,0.0,7.07,no,0.469,6.421,78.900002,4.9671,2,242,17.799999,9.14,21.600000
2,Swampscott,2022,-70.935997,42.283001,0.02729,0.0,7.07,no,0.469,7.185,61.099998,4.9671,2,242,17.799999,4.03,34.700001
3,Marblehead,2031,-70.928001,42.292999,0.03237,0.0,2.18,no,0.458,6.998,45.799999,6.0622,3,222,18.700001,2.94,33.400002
4,Marblehead,2032,-70.921997,42.298000,0.06905,0.0,2.18,no,0.458,7.147,54.200001,6.0622,3,222,18.700001,5.33,36.200001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,Winthrop,1801,-70.986000,42.231201,0.06263,0.0,11.93,no,0.573,6.593,69.099998,2.4786,1,273,21.000000,9.67,22.400000
502,Winthrop,1802,-70.990997,42.227501,0.04527,0.0,11.93,no,0.573,6.120,76.699997,2.2875,1,273,21.000000,9.08,20.600000
503,Winthrop,1803,-70.994797,42.226002,0.06076,0.0,11.93,no,0.573,6.976,91.000000,2.1675,1,273,21.000000,5.64,23.900000
504,Winthrop,1804,-70.987503,42.223999,0.10959,0.0,11.93,no,0.573,6.794,89.300003,2.3889,1,273,21.000000,6.48,22.000000


In [3]:
# to retain just the numerical variables do this
x = data.drop('river',axis=1).loc[:,'crime':'lstat'].values
y = data['cmedv'].values

In [14]:
class neuralnet_regression(nn.Module):
    def __init__(self, n_features):
        super(neuralnet_regression, self).__init__()
        # let's define the topology first
        self.layer1 = nn.Linear(n_features, 30).double()
        self.layer2 = nn.Linear(30, 20).double()
        self.layer3 = nn.Linear(10,5).double()
        self.layer_out = nn.Linear(20,1).double()
        # let's define the activations for each layer
        self.a1 = nn.PReLU().double()
        self.a2 = nn.PReLU().double()
        self.a3 = nn.GELU().double()

    def forward(self, x):
        x = self.a1(self.layer1(x))  # Apply PReLU activation
        x = self.a2(self.layer2(x))
        #x = self.a3(self.layer3(x))
        x = self.layer_out(x)
        return x
    def elastic_net_regularization(self, alpha=0.01, l1_ratio=0.5):

      l1_reg = 0
      l2_reg = 0

      # Apply regularization to weights in all linear layers
      for name, param in self.named_parameters():
          if 'weight' in name:
              l1_reg += torch.norm(param, 1).double()  # L1 norm (sum of absolute values)
              l2_reg += torch.norm(param, 2).double() ** 2  # L2 norm squared (sum of squares)

      # Combine L1 and L2 regularization
      reg_loss = alpha *(l1_ratio * l1_reg + 0.5*(1-l1_ratio) * l2_reg)

      return reg_loss

In [35]:
model = neuralnet_regression(x.shape[1])
criterion = nn.MSELoss().double()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [36]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.35,shuffle=True,random_state=301)

In [37]:
# perhaps a good idea is to scale the input features
scaler = StandardScaler()
xtrain_scaled = scaler.fit_transform(xtrain)
xtest_scaled = scaler.transform(xtest)

In [44]:
X_train_tensor = torch.tensor(xtrain_scaled, dtype=torch.float64)
X_test_tensor = torch.tensor(xtest_scaled, dtype=torch.float64)
y_train_tensor = torch.tensor(ytrain, dtype=torch.float64)
y_test_tensor = torch.tensor(ytest, dtype=torch.float64)

# Create DataLoader for training and testing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, shuffle=False)

In [45]:
model = neuralnet_regression(x.shape[1])
# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    # a peculiar aspect of Pytorch -> you put the model in a "training" state
    model.train()
    for X_batch, y_batch in train_loader:
        # this resets the optimizer before each calculation of the direction for updating the weights
        optimizer.zero_grad()
        # do a forward propagation
        outputs = model(X_batch)
        # use the criterion to compute the loss of the batch
        main_loss = criterion(outputs, y_batch)
        reg_loss = model.elastic_net_regularization(alpha=0.02,l1_ratio=0.5)
        loss = main_loss + reg_loss
        # here we backpropagate to update the weigths
        loss.backward()# the approximation of the gradient 
        optimizer.step() # the actual update of weights with the algorithm chosen

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss (MSE): {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [10/100], Loss (MSE): 370.6787
Epoch [20/100], Loss (MSE): 820.9691
Epoch [30/100], Loss (MSE): 771.1392
Epoch [40/100], Loss (MSE): 942.0142
Epoch [50/100], Loss (MSE): 529.2059
Epoch [60/100], Loss (MSE): 577.5363
Epoch [70/100], Loss (MSE): 939.5683
Epoch [80/100], Loss (MSE): 622.3169
Epoch [90/100], Loss (MSE): 571.3141
Epoch [100/100], Loss (MSE): 552.8695
