In [1]:
import numpy as np
import torch

!pip install ucimlrepo --quiet
from ucimlrepo import fetch_ucirepo

Import the wine quality dataset

In [2]:
  # fetch dataset
wine = fetch_ucirepo(id=186)

# data (as pandas dataframes)
X = wine.data.features
y = wine.data.targets

# metadata
print(X.columns)

# variable information
print(y.columns)

Index(['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
       'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
       'pH', 'sulphates', 'alcohol'],
      dtype='object')
Index(['quality'], dtype='object')


Remove any missing values from features and labels

In [3]:
bad = X.isna().any(axis=1)
X = X[~bad]
y = y[~bad]

In [4]:
X = X.values
y = y.values

Normalize y's values -- the minimum class for y is 3, and the maximum 9, so we will shift it downwards to min 0 and max 6, so that we only have the network predicting 7 classes instead of 10, which would be necessary if this step wasn't taken

In [5]:
y = y - 3

Turn X and y arrays into torch Tensors

In [6]:
X = torch.tensor(X).float()
y = torch.tensor(y).long().squeeze()

Divide data into train/test splits

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

Create Neural Network

In [8]:
from torch import nn

nn_model = torch.nn.Sequential(
    torch.nn.Linear(11, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 7)
)

Train Neural Network

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
nn_model.to(device)
X_train, y_train, X_test, y_test = X_train.to(device), y_train.to(device), X_test.to(device), y_test.to(device)
pass

In [10]:
# optimizer and loss for basic NN
lr = 1e-4
loss_fn = torch.nn.CrossEntropyLoss()
opt = torch.optim.SGD(nn_model.parameters(), lr=lr)

# accuracy scoring function
def accuracy(model, X, y):
  z = model(X)
  pred = torch.argmax(z, dim=1)
  return (pred==y).float().mean().item()

def train(model, loss_fn, opt):
  # training loop
  epochs = 2000
  for epoch in range(epochs):
    opt.zero_grad()
    z = model(X_train)
    loss = loss_fn(z, y_train)
    loss.backward()
    opt.step()
    if epoch % 100 == 0:
      print(f"epoch {epoch} loss = {loss:.4f} accuracy = {accuracy(model, X_test, y_test):.4f}")
    else:
      #print(f"epoch {epoch} loss = {loss}")
      pass

Evaluate Neural Network

In [11]:
train(nn_model, loss_fn, opt)
print(f"=== Train dataset accuracy: {accuracy(nn_model, X_train, y_train):.4f} ===")
print(f"=== Test dataset accuracy:  {accuracy(nn_model, X_test, y_test):.4f} ===")

epoch 0 loss = 2.5665 accuracy = 0.0338
epoch 100 loss = 1.5043 accuracy = 0.4262
epoch 200 loss = 1.4283 accuracy = 0.4215
epoch 300 loss = 1.4033 accuracy = 0.4292
epoch 400 loss = 1.3909 accuracy = 0.4338
epoch 500 loss = 1.3832 accuracy = 0.4323
epoch 600 loss = 1.3776 accuracy = 0.4354
epoch 700 loss = 1.3732 accuracy = 0.4431
epoch 800 loss = 1.3695 accuracy = 0.4446
epoch 900 loss = 1.3662 accuracy = 0.4462
epoch 1000 loss = 1.3633 accuracy = 0.4508
epoch 1100 loss = 1.3607 accuracy = 0.4508
epoch 1200 loss = 1.3582 accuracy = 0.4508
epoch 1300 loss = 1.3559 accuracy = 0.4585
epoch 1400 loss = 1.3538 accuracy = 0.4631
epoch 1500 loss = 1.3518 accuracy = 0.4631
epoch 1600 loss = 1.3499 accuracy = 0.4615
epoch 1700 loss = 1.3480 accuracy = 0.4631
epoch 1800 loss = 1.3463 accuracy = 0.4646
epoch 1900 loss = 1.3446 accuracy = 0.4631
=== Train dataset accuracy: 0.4283 ===
=== Test dataset accuracy:  0.4646 ===


After training the neural network using only basic gradient descent, we are seeing:  
training dataset accuracy of 0.4330  
test dataset accuracy of     0.4692  
  
Now we will apply some techniques learned in class

In [23]:
# new nn to avoid inheriting params from previous one
# also added an additional hidden layer
nn_model2 = torch.nn.Sequential(
    torch.nn.Linear(8, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 100),
    torch.nn.SiLU(),
    torch.nn.Linear(100, 1),
    torch.nn.Sigmoid()
)
nn_model2.to(device)
pass

Data manipulation to facilitate better training outcomes

In [13]:
X2 = X - X.mean(dim=0)
X2 /= X2.std(dim=0)

PCA transformation and new train/test splits

In [14]:
from sklearn.decomposition import PCA

pca = PCA(n_components=8, whiten=True)
pca.fit(X_train.cpu().numpy())

X2 = pca.transform(X2.cpu().numpy())

y_norm = (y + 3) / 10.0  # scale to between 0 and 1
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y_norm.cpu().numpy(), test_size=0.2, random_state=42)

# Convert the NumPy arrays back to PyTorch tensors and move to the device
X2_train, y2_train, X2_test, y2_test = \
torch.tensor(X2_train).to(device), \
torch.tensor(y2_train).to(device), \
torch.tensor(X2_test).to(device), \
torch.tensor(y2_test).to(device)

SGD optimization with L2 Regularization

In [15]:
lr2 = 1e-4
opt2 = torch.optim.SGD(nn_model2.parameters(), lr=lr2, weight_decay=0.0001)

Batch data loading

In [16]:
from torch.utils.data import TensorDataset, DataLoader

# training data loader
train_ds = TensorDataset(X2_train, y2_train)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)

# testing data loader
test_ds = TensorDataset(X2_test, y2_test)
test_dl = DataLoader(test_ds, batch_size=32)

In [21]:
loss_fn2 = torch.nn.MSELoss()

# new accuracy scoring function
def accuracy2(model2, data_loader):
  correct_preds, total_preds = 0, 0

  for x, y in data_loader:
    x, y = x.to(device), y.to(device)
    z = model2(x)
    correct_preds += (torch.round(z, decimals=1) == torch.round(y, decimals=1)).sum().item()
    total_preds += y.size(0)

  accuracy = correct_preds / total_preds
  return accuracy

def train2(model2, loss_fn2, opt2):
  # new training loop
  epochs = 2000
  for epoch in range(epochs):
    for x, y in train_dl:
      x, y = x.to(device), y.to(device)
      opt2.zero_grad()
      z = model2(x)
      loss = loss_fn2(z, y)
      loss.backward()
      opt2.step()
    if epoch % 100 == 0:
      print(f"epoch {epoch} loss = {loss:.4f} accuracy = {accuracy2(nn_model2, test_dl):.4f}")
    else:
      #print(f"epoch {epoch} loss = {loss}")
      pass

In [None]:
train2(nn_model2, loss_fn2, opt2)
print(f"=== Train dataset accuracy: {accuracy2(nn_model2, train_dl):.4f} ===")
print(f"=== Test dataset accuracy:  {accuracy2(nn_model2, test_dl):.4f} ===")

epoch 0 loss = 0.0184 accuracy = 9.8308


We were able to improve the test dataset accuracy by almost 7 percent! Interestingly, the test dataset accuracy is higher than the train dataset accuracy for both models