# Initialization

## Import libraries

In [13]:
import torch
import pandas as pd
import numpy as np

import torch.nn as nn
from torchvision import models
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

import ssl # Quickfix to torchaudio ssl error
ssl._create_default_https_context = ssl._create_unverified_context

## Import data

In [None]:
train = "load data here"

# Preprocessing

In [14]:
class CustomDataset(Dataset):
    def __init__(self, features: pd.DataFrame, labels: pd.Series = None):
        # TODO: Format the dataset class to match with the current image format
        self.features = torch.tensor(features.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.int64)\
            if labels is not None else None

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        if self.labels is None:
            return self.features[idx]
        return self.features[idx], self.labels[idx]

In [None]:
train_frac = 0.8
train_len = int(train_frac * len(train))
train_data, validation_data = train.iloc[:train_len], train.iloc[train_len:]

train_dataset = CustomDataset(train_data.drop(columns='label'), train_data['label'])
validation_dataset = CustomDataset(train_data.drop(columns='label'), train_data['label'])

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=64, shuffle=True)

# Modeling

## Model construction

In [4]:
class DiabetesClassif(nn.Module):
    def __init__(self, num_classes):
        super(DiabetesClassif, self).__init__()
        self.backbone = models.mobilenet_v3_large(pretrained=True)
        self.backbone.classifier[3] = nn.Linear(self.backbone.classifier[3].in_features, num_classes)
        
    def forward(self, x):
        return self.backbone(x)

In [8]:
num_classes = 2
model = DiabetesClassif(num_classes)

# Training and Validation Loop

In [12]:
def training_loop(model, epochs, optimizer, loss_fn, data):
    for t in range(epochs):
        loop = tqdm(data, total=len(data))
        model.train()

        for _, (X, y) in enumerate(loop):
            optimizer.zero_grad()

            pred = model(X)
            loss = loss_fn(pred, y)
            
            loss.backward()
            optimizer.step()

            loop.set_description(f"Epoch [{t+1}/{epochs}]")
            loop.set_postfix(loss=loss.item())

    print("Training completed.")

def validation_loop(model, loss_fn, data):
    model.eval()
    size = len(data.dataset)
    num_batches = len(data)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in data:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        
    test_loss /= num_batches
    correct /= size

    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}\n")

In [None]:
epochs = 10
optimizer = AdamW(params=model.parameters)
loss_fn = nn.CrossEntropyLoss()

# TODO: Prepare data processing and dataloader
training_loop(model, epochs, optimizer, loss_fn, train_dataloader)
validation_loop(model, loss_fn, validation_dataloader)