In [404]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset, random_split
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import json
import warnings

In [405]:
warnings.filterwarnings("ignore")

In [406]:
class WineDataset(Dataset):
    def __init__(self):
        super().__init__()
        wine = load_wine(as_frame=True)
        dataset = pd.concat([wine.data, wine.target], axis=1)
        # print(f'Original Wine Dataset: Samples = {len(dataset)}, Labels = {dataset["target"].unique()}, Features = {len(dataset.columns)-1}')
        dataset = dataset[dataset['target'] < 2]
        # print(f'Updated Wine Dataset: Samples = {len(dataset)}, Labels = {dataset["target"].unique()}, Features = {len(dataset.columns)-1}')
        self.X = torch.tensor(dataset.iloc[:, :13].values, dtype=torch.float32)
        self.y = torch.tensor(dataset.iloc[:, 13].values, dtype=torch.float32)

        # normalize X
        self.X = (self.X - self.X.mean(dim=0)) / self.X.std(dim=0)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        features = torch.tensor(self.X[idx], dtype=torch.float32)
        label = torch.tensor(self.y[idx], dtype=torch.int64)
        return features, label

In [407]:
def forward(w, X, b):
    return F.sigmoid((X @ w) + b)

def loss_fn(proba, truth):
    log_p = torch.log(proba)
    log_q = torch.log(1 - proba)
    return -1 * torch.mean(truth * log_p + (1-truth) * log_q)

def weight_gradients(X, y_proba, y_truth):
    grads = (y_proba - y_truth).unsqueeze(-1) * X
    return torch.mean(grads, dim=0) # mean over batch

def bias_gradient(y_proba, y_truth):
    grads = (y_proba - y_truth)
    return torch.mean(grads, dim=0) # mean over batch

In [None]:
def train_model(dataloader, w, b, lr, k):
    num_samples = len(dataloader.dataset)
    num_batches = len(dataloader)

    train_loss, correct = 0.0, 0

    for batch, (X, y) in enumerate(dataloader):

        y_proba = forward(w, X, b)
        loss = loss_fn(y_proba, y)
        train_loss += loss.item()
        correct += ((y_proba > 0.5).int() == y).float().sum().item()

        # Backpropagation
        grads_w = weight_gradients(X, y_proba, y)
        grad_b = bias_gradient(y_proba, y)

        # Update bias
        b = b - lr * (grad_b)
        
        # Choose top-k coordinates
        indices = torch.argsort(torch.abs(grads_w), descending=True)
        update_idx = indices[:k]  # Select top-k coordinates

        # Update the top-k coordinates
        w[update_idx] = w[update_idx] - lr * grads_w[update_idx]

        # Decay all other coordinates to zero
        zero_idx = indices[k:]  # Indices of the bottom d-k coordinates
        w[zero_idx] *= 0.9 
    
    average_train_loss = train_loss/num_batches
    accuracy = correct / num_samples

    return average_train_loss, accuracy

In [409]:
def eval_model(dataloader, w, b):
    num_samples = len(dataloader.dataset)
    num_batches = len(dataloader)

    eval_loss, correct = 0.0, 0

    for batch, (X, y) in enumerate(dataloader):

        y_proba = forward(w, X, b)
        loss = loss_fn(y_proba, y)
        eval_loss += loss.item()
        correct += ((y_proba > 0.5).int() == y).float().sum().item()

    average_eval_loss = eval_loss/num_batches
    accuracy = correct / num_samples

    return average_eval_loss, accuracy

In [410]:
def experiment(batch_size, epochs, lr, k):
    dataset = WineDataset()

    # Define sizes (80% train, 20% test)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    w = torch.zeros(13)
    b = torch.zeros(1)

    train_losses = []
    for e in range(1, epochs+1):
        train_loss, train_accuracy = train_model(train_loader, w, b, lr, k)
        train_losses.append(train_loss)
        if e % 25 == 0:
            print(f'Training Epoch {e}/{epochs}: Train Loss: {train_loss}, Accuracy: {train_accuracy:.4f}')
        
    test_loss, test_accuracy = eval_model(test_loader, w, b)
    print(f'Test evaluation: Loss: {test_loss}, Accuracy: {test_accuracy:.4f}')

    return train_losses

In [411]:
def experiment2(batch_size, epochs, lr, k):
    dataset = WineDataset()

    # Define sizes (80% train, 20% test)
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    w = torch.zeros(13)
    b = torch.zeros(1)

    data = []
    for e in range(1, epochs+1):
        train_loss, train_accuracy = train_model(train_loader, w, b, lr, k)
        if e % 300 == 0:
            data.extend([train_loss, train_accuracy])
        
    test_loss, test_accuracy = eval_model(test_loader, w, b)
    data.extend([test_loss, test_accuracy])

    # print("Params, ", w, b)
        
    return data

In [412]:
losses = experiment(16, 300, 0.01, 10)
with open("losses-CD-KSparse.json", "w") as file:
    json.dump(losses, file, indent=4)

Training Epoch 25/300: Train Loss: 0.3034490815230778, Accuracy: 0.9904
Training Epoch 50/300: Train Loss: 0.23803700506687164, Accuracy: 1.0000
Training Epoch 75/300: Train Loss: 0.2339185561452593, Accuracy: 1.0000
Training Epoch 100/300: Train Loss: 0.23800951029573167, Accuracy: 0.9904
Training Epoch 125/300: Train Loss: 0.20268521351473673, Accuracy: 1.0000
Training Epoch 150/300: Train Loss: 0.21066935786179133, Accuracy: 0.9904
Training Epoch 175/300: Train Loss: 0.20849287935665675, Accuracy: 0.9904
Training Epoch 200/300: Train Loss: 0.20396849513053894, Accuracy: 1.0000
Training Epoch 225/300: Train Loss: 0.18715218773909978, Accuracy: 0.9904
Training Epoch 250/300: Train Loss: 0.20030975554670608, Accuracy: 1.0000
Training Epoch 275/300: Train Loss: 0.21640215814113617, Accuracy: 1.0000
Training Epoch 300/300: Train Loss: 0.18903888123376028, Accuracy: 0.9904
Test evaluation: Loss: 0.2127237468957901, Accuracy: 0.9231


In [413]:
execution_data = {}
for k in range(1, 14):
    execution_data[k] = []
    for trial in range(20):
        execution_data[k].append(experiment2(16, 300, 0.01, k))

with open("losses-CD-Ksparse-Trials2.json", "w") as file:
    json.dump(execution_data, file, indent=4)