In [108]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import json
import warnings

In [109]:
warnings.filterwarnings("ignore")

In [110]:
class WineDataset(Dataset):
    def __init__(self):
        super().__init__()
        wine = load_wine(as_frame=True)
        dataset = pd.concat([wine.data, wine.target], axis=1)
        print(f'Original Wine Dataset: Samples = {len(dataset)}, Labels = {dataset["target"].unique()}, Features = {len(dataset.columns)-1}')
        dataset = dataset[dataset['target'] < 2]
        print(f'Updated Wine Dataset: Samples = {len(dataset)}, Labels = {dataset["target"].unique()}, Features = {len(dataset.columns)-1}')
        self.X = torch.tensor(dataset.iloc[:, :13].values)
        self.y = torch.tensor(dataset.iloc[:, 13].values)

        # normalize X
        self.X = (self.X - self.X.mean(dim=0)) / self.X.std(dim=0)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        features = torch.tensor(self.X[idx], dtype=torch.float32)
        label = torch.tensor(self.y[idx], dtype=torch.int64)
        return features, label

In [111]:
class LogisticRegressionModel(nn.Module):
    def __init__(self, feat_dim=13, output_dim=2):
        super().__init__()
        self.linear = nn.Linear(feat_dim, output_dim, bias=True)
    
    def forward(self, x):
        return F.log_softmax(self.linear(x), dim=-1)

In [112]:
def train_model(dataloader, model, loss_fn, optimizer):
    num_samples = len(dataloader.dataset)
    num_batches = len(dataloader)

    model.train()

    train_loss, correct = 0.0, 0

    for batch, (X, y) in enumerate(dataloader):

        pred = model(X)
        loss = loss_fn(pred, y)
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).float().sum().item()

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    average_train_loss = train_loss/num_batches
    accuracy = correct / num_samples

    return average_train_loss, accuracy

In [113]:
def eval_model(dataloader, model, loss_fn):
    num_samples = len(dataloader.dataset)
    num_batches = len(dataloader)

    model.eval()

    eval_loss, correct = 0.0, 0

    for batch, (X, y) in enumerate(dataloader):

        pred = model(X)
        loss = loss_fn(pred, y)
        eval_loss += loss.item()
        correct += (pred.argmax(1) == y).float().sum().item()

    average_eval_loss = eval_loss/num_batches
    accuracy = correct / num_samples

    return average_eval_loss, accuracy

In [None]:
def experiment(batch_size, epochs, lr):
    dataset = WineDataset()

    # Define sizes (80% train, 20% test)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model = LogisticRegressionModel(feat_dim=13, output_dim=2)
    loss_fn = nn.NLLLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    train_losses = []
    for e in range(1, epochs+1):
        train_loss, train_accuracy = train_model(train_loader, model, loss_fn, optimizer)
        train_losses.append(train_loss)
        if e % 25 == 0:
            print(f'Training Epoch {e}/{epochs}: Train Loss: {train_loss}, Accuracy: {train_accuracy:.4f}')
        
    test_loss, test_accuracy = eval_model(test_loader, model, loss_fn)
    print(f'Test evaluation: Loss: {test_loss}, Accuracy: {test_accuracy:.4f}')

    return train_losses

In [115]:
losses = experiment(16, 300, 0.01)

Original Wine Dataset: Samples = 178, Labels = [0 1 2], Features = 13
Updated Wine Dataset: Samples = 130, Labels = [0 1], Features = 13
Training Epoch 25/300: Train Loss: 0.1829517654010228, Accuracy: 0.9423
Training Epoch 50/300: Train Loss: 0.11985122306006295, Accuracy: 0.9808
Training Epoch 75/300: Train Loss: 0.0911063626408577, Accuracy: 1.0000
Training Epoch 100/300: Train Loss: 0.07798221388033458, Accuracy: 1.0000
Training Epoch 125/300: Train Loss: 0.06328266326870237, Accuracy: 1.0000
Training Epoch 150/300: Train Loss: 0.05940225374485765, Accuracy: 1.0000
Training Epoch 175/300: Train Loss: 0.05318280043346541, Accuracy: 1.0000
Training Epoch 200/300: Train Loss: 0.04422071283417089, Accuracy: 1.0000
Training Epoch 225/300: Train Loss: 0.04329369296985013, Accuracy: 1.0000
Training Epoch 250/300: Train Loss: 0.036922618480665345, Accuracy: 1.0000
Training Epoch 275/300: Train Loss: 0.03617765421846083, Accuracy: 1.0000
Training Epoch 300/300: Train Loss: 0.033970478789082

In [117]:
with open("losses-SGD.json", "w") as file:
    json.dump(losses, file, indent=4)