## Question 3: Imbalanced Classification

In [3]:
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

### Load the data set

In [4]:
data = torch.load('malaria.pt')  
class_names = data["class_names"]
Xtrain = data['Xtrain']
Xtest = data['Xtest']
Ytrain = data['Ytrain']
Ytest = data['Ytest']

 ### Question 3(a): Class Proportions

In [5]:
#Add code for 3(a) here

### Define the model

In [6]:
class MLP(nn.Module):
    def __init__(self, D, K1, K2, C):
        super(MLP, self).__init__()

        self.device  = "cuda" if torch.cuda.is_available() else "cpu" 
        self.fc1 = nn.Linear(D, K1).to(self.device)
        self.fc2 = nn.Linear(K1, K2).to(self.device)
        self.fc3 = nn.Linear(K2, C).to(self.device)
        self.activation = self.activation = torch.relu

    def forward(self, X):
        n = X.size()[0]
        X = (X-self.mean)/self.std
        h1 = self.activation(self.fc1(X.reshape(n,-1)))
        h2 = self.activation(self.fc2(h1))
        g = self.fc3(h2)
        return g
    
    def predict_proba(self,X):
        N      = X.shape[0]
        logits = self.forward(X.reshape(N,-1))
        probs  = F.softmax(logits, dim=-1)
        return(probs)
    
    def predict(self,X):
        N      = X.shape[0]
        logits = self.forward(X.reshape(N,-1))
        return(torch.argmax(logits, dim=-1))

    def fit(self,Xtrain, Ytrain, lr=0.01, num_epochs=25, batch_size=512):

        N            = Xtrain.shape[0]
        Xtrain       = Xtrain.reshape(N,-1).to(self.device)
        Ytrain       = Ytrain.to(self.device)

        self.mean    = Xtrain.mean(dim=0, keepdim=True).to(self.device)
        self.std     = Xtrain.std(dim=0, keepdim=True).to(self.device)

        dataset      = TensorDataset(Xtrain, Ytrain)
        train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)        
        optimizer    = optim.Adam(self.parameters(), lr=lr)

        # Start training loop loop
        for epoch in tqdm(range(num_epochs)): #(loop for every epoch)

            running_loss = 0.0
            running_err  = 0.0

            #Load batches
            for inputs, labels in train_loader:

                #Zero the gradient
                optimizer.zero_grad()

                #Run the forward pass
                out      = self.forward(inputs)
                lp       = F.log_softmax(out, dim=-1)
                loss     = F.nll_loss(lp, labels, reduction='mean')
                _, preds = torch.max(out, 1)

                # get gradient and update the network weights
                loss.backward()
                optimizer.step()

                #Accumulate the loss
                running_loss += loss.item()
                running_err += torch.sum(preds != labels.data).item()

            epoch_loss = running_loss / len(train_loader.dataset)
            epoch_err = running_err / len(train_loader.dataset) 

            # Print progress
            print(f'[Train {epoch+1}] Loss: {epoch_loss:.8f} Err: {epoch_err:.8f}%')

            

### Question 3(b): Learn the base model and compute results

In [None]:
#Add code for 3(b) here

### Question 3(c): Update model to use balanced nll and compute results

In [8]:
#Add code for 3(c) here