In [1]:
import sys
sys.path.insert(0, '..')
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from cdw_cross_entropy_loss import CDW_CELoss
from torchmetrics.classification import MulticlassCohenKappa

# Data
We are going to work with the Wine Quality Dataset: [CSV file](https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv).

In [2]:
class WineDataset(Dataset):
    def __init__(self, root_dir: str, 
                 transform=None, 
                 target_transform=None):
        self.root_dir = root_dir
        self.df = pd.read_csv(self.root_dir, sep=',')
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform

        self.targets = self.df['quality'].values
        self.samples = self.df.drop('quality', axis=1).values
        
        if self.target_transform is not None:
            self.targets = self.target_transform(self.targets)

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        target = self.targets[idx]
        sample = self.samples[idx]
    
        if self.transform is not None:
            sample = self.transform(sample)
        return sample, target

In [3]:
class WineDataModule(pl.LightningDataModule):
    def __init__(self, batch_size: int = 4, num_workers: int = 4):
        super().__init__()
        self.batch_size = batch_size
        self.num_workers = num_workers
        
    def prepare_data(self) -> None:
        df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', 
                 sep=";")
        ## Create Test and Train Set
        train, test = train_test_split(df, test_size=0.3, random_state=42)
        train.to_csv('data/wine_train.csv', index=False)
        test.to_csv('data/wine_test.csv', index=False)
    def setup(self, stage):
        self.train = WineDataset('data/wine_train.csv', target_transform = lambda x: x - 3)
        self.test  = WineDataset('data/wine_test.csv', target_transform = lambda x: x - 3)
    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True)
    def val_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=self.num_workers)
    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=self.num_workers)


Initialize the model

In [4]:
dataloader = WineDataModule(batch_size=32)
dataloader.prepare_data()
dataloader.setup(stage="fit")

## Model

In [5]:
class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_dim)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(0.2)
    def forward(self, x):
        x = self.dropout(self.fc1(x))
        x = self.gelu(x)
        x = self.dropout(self.fc2(x))
        x = self.gelu(x)
        x = self.fc3(x)
        return x

## Simple Cross-Entropy

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleNN(input_dim=11, output_dim=7).to(device) ## 11 features, and 7 classes
optim = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss = nn.CrossEntropyLoss().to(device)

kappa_train = MulticlassCohenKappa(num_classes=7, weights="quadratic").to(device)
kappa_test = MulticlassCohenKappa(num_classes=7, weights="quadratic").to(device)


NUM_EPOCHS = 50

In [7]:
for i in range(NUM_EPOCHS):
    for batch in dataloader.train_dataloader():
        x, y = batch
        
        y_hat = model(x.to(device).float())
        pred = torch.argmax(y_hat, dim=1)
        kappa_train.update(pred, y.to(device))
        l = loss(y_hat, y.to(device))
        l.backward()
        optim.step()
        optim.zero_grad()
    if i%5 == 0:
        print("Epoch %s: %.2f Kappa Score^2" %(i, kappa_train.compute()))


Epoch 0: 0.03 Kappa Score^2
Epoch 5: 0.01 Kappa Score^2
Epoch 10: 0.01 Kappa Score^2
Epoch 15: 0.03 Kappa Score^2
Epoch 20: 0.05 Kappa Score^2
Epoch 25: 0.07 Kappa Score^2
Epoch 30: 0.09 Kappa Score^2
Epoch 35: 0.10 Kappa Score^2
Epoch 40: 0.12 Kappa Score^2
Epoch 45: 0.13 Kappa Score^2


In [8]:
model.eval()
for batch in dataloader.test_dataloader():
    x, y = batch
    y_hat = model(x.to(device).float())
    pred = torch.argmax(y_hat, dim=1)
    kappa_test.update(pred, y.to(device))
print("Test Kappa Score^2: %.2f" %kappa_test.compute())

Test Kappa Score^2: 0.19


## CDW Loss 
Class distance weighted loss with the cross entropy

We found out that CDW Loss alone does not work well, it works only as a suplement to the CE Loss (see below).

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleNN(input_dim=11, output_dim=7).to(device) ## 11 features, and 7 classes
optim = torch.optim.AdamW(model.parameters(), lr=1e-3)
loss_cdw = CDW_CELoss(num_classes=7,
                  transform="huber",
                  alpha=1.5).to(device)
loss = nn.CrossEntropyLoss().to(device)


kappa_train = MulticlassCohenKappa(num_classes=7, weights="quadratic").to(device)
kappa_test = MulticlassCohenKappa(num_classes=7, weights="quadratic").to(device)


NUM_EPOCHS = 50

In [7]:
for i in range(NUM_EPOCHS):
    for batch in dataloader.train_dataloader():
        x, y = batch
        
        y_hat = model(x.to(device).float())
        probs = F.softmax(y_hat, dim=1)
        pred = torch.argmax(y_hat, dim=1)
        kappa_train.update(pred, y.to(device))
        l = 0.2 * loss_cdw(probs, y.to(device)) + 0.8 * loss(y_hat, y.to(device))
        l.backward()
        optim.step()
        optim.zero_grad()
    if i%5 == 0:
        print("Epoch %s: %.2f Kappa Score^2" %(i, kappa_train.compute()))

Epoch 0: 0.00 Kappa Score^2
Epoch 5: 0.05 Kappa Score^2
Epoch 10: 0.07 Kappa Score^2
Epoch 15: 0.09 Kappa Score^2
Epoch 20: 0.11 Kappa Score^2
Epoch 25: 0.13 Kappa Score^2
Epoch 30: 0.14 Kappa Score^2
Epoch 35: 0.16 Kappa Score^2
Epoch 40: 0.18 Kappa Score^2
Epoch 45: 0.19 Kappa Score^2


In [8]:
model.eval()
for batch in dataloader.test_dataloader():
    x, y = batch
    y_hat = model(x.to(device).float())
    pred = torch.argmax(y_hat, dim=1)
    kappa_test.update(pred, y.to(device))
print("Test Kappa Score^2: %.2f" %kappa_test.compute())

Test Kappa Score^2: 0.32
