# Question 1

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from sklearn.model_selection import KFold
from torch.utils.data import Dataset, DataLoader

In [None]:
def load_dataset(path):
    with open(path, 'rb') as f:
        train_data, test_data = pickle.load(f)
    
    X_train = torch.tensor(train_data[0], dtype=torch.float)
    y_train = torch.tensor(train_data[1], dtype=torch.long)
    X_test = torch.tensor(test_data[0], dtype=torch.float)
    y_test = torch.tensor(test_data[1], dtype=torch.long)
    return X_train, y_train, X_test, y_test

In [None]:
class MnistDataset(Dataset):
    ...

## (a)

## (b)

**Complete the following Python class for training/evaluation**

In [None]:
class Trainer:
    
    def __init__(self, model, opt_method, learning_rate, batch_size, epoch, l2):
        self.model = model
        if opt_method == "adam":
            self.optimizer = ...
        elif opt_method == "sgdm":
            # Hint: to use SGDM in PyTorch, use torch.optim.SGD and set momentum to a non-zero value
            self.optimizer = ...
        else:
            raise NotImplementedError("This optimization is not supported")
        
        self.epoch = epoch
        self.batch_size = batch_size
    
    def train(self, train_data, val_data, early_stop=True, draw_curve=True):
        train_loader = DataLoader(train_data, batch_size=self.batch_size, shuffle=True)
        
        train_loss_list, train_acc_list = [], []
        val_loss_list, val_acc_list = [], []
        weights = self.model.state_dict()
        lowest_val_loss = np.inf
        loss_func = nn.CrossEntropyLoss()
        for n in tqdm(range(self.epoch), leave=False):
            # enable train mode
            ...
            epoch_loss, epoch_acc = 0.0, 0.0
            for X_batch, y_batch in train_loader:
                batch_importance = ...
                y_pred = ...
                batch_loss = ...
                
                self.optimizer.zero_grad()
                batch_loss.backward()
                self.optimizer.step()
                
                epoch_loss += ...
                batch_acc = ...
                epoch_acc += ...
            train_loss_list.append(epoch_loss)
            train_acc_list.append(epoch_acc)
            val_loss, val_acc = ...
            val_loss_list.append(val_loss)
            val_acc_list.append(val_acc)
            
            if early_stop:
                if val_loss < lowest_val_loss:
                    lowest_val_loss = val_loss
                    weights = self.model.state_dict()
            
        if draw_curve:
            x_axis = np.arange(self.epoch)
            fig, axes = plt.subplots(1, 2, figsize=(10, 4))
            axes[0].plot(x_axis, train_loss_list, label="Train")
            axes[0].plot(x_axis, val_loss_list, label="Validation")
            axes[0].set_title("Loss")
            axes[0].legend()
            axes[1].plot(x_axis, train_acc_list, label='Train')
            axes[1].plot(x_axis, val_acc_list, label='Validation')
            axes[1].set_title("Accuracy")
            axes[1].legend()
        
        if early_stop:
            self.model.load_state_dict(weights)
        
        return {
            "train_loss_list": train_loss_list,
            "train_acc_list": train_acc_list,
            "val_loss_list": val_loss_list,
            "val_acc_list": val_acc_list,
        }
    
    def evaluate(self, data, print_acc=False):
        # enable evaluation mode
        ...
        loader = DataLoader(data, batch_size=self.batch_size, shuffle=True)
        loss_func = nn.CrossEntropyLoss()
        acc, loss = 0.0, 0.0
        for X_batch, y_batch in loader:
            with torch.no_grad():
                batch_importance = ...
                y_pred = ...
                batch_loss = ...
                batch_acc = ...
                acc += ...
                loss += ...
        if print_acc:
            print(f"Accuracy: {acc:.3f}")
        return loss, acc
            
            

**Complete the following function to do KFold cross validation**

In [None]:
def KFoldCrossValidation(
    model_class, k, 
    X_train, y_train, X_test, y_test, 
    opt_method='adam', learning_rate=2e-3, batch_size=128, epoch=50, l2=0.0
):
    # Use MnistDataset to organize data
    test_data = ...
    kf = ...
    train_acc_list, test_acc_list = [], []
    for i, (train_index, val_index) in enumerate(kf.split(X_train)):
        print(f"Fold {i}:")
        
        # Use MnistDataset to organize data
        train_data = ...
        val_data = ...
        
        model = model_class()
        # initialize a Trainer object
        trainer = ...
        # call trainer.train() here
        res = ...
        # record the training accuracy of the epoch that has the lowest valiation lost
        # Hint: use np.argmin
        train_acc_best = ...
        # test, use trainer.evaluate function
        test_loss, test_acc = ...
        
        train_acc_list.append(train_acc_best)
        test_acc_list.append(test_acc)
        
        print(f"Training accuracy: {train_acc_best}")
        print(f"Test accuracy: {test_acc}")
    
    print("Final results:")
    # Report mean and std
    print(f"Training accuracy:", ...)
    print(f"Test accuracy:", ...)

## (c)

In [None]:
class Net3(nn.Module):
    def __init__(self):
        super().__init__()
        ...
    
    def forward(self, x):
        return ...

In [None]:
KFoldCrossValidation(...)

## (d)

In [None]:
class Net50(nn.Module):
    def __init__(self):
        super().__init__()
        ...
    
    def forward(self, x):
        return ...

In [None]:
KFoldCrossValidation(...)

# Question 2

## (a)

In [None]:
class Net50Dropout(nn.Module):
    def __init__(self):
        super().__init__()
        ...
    
    def forward(self, x):
        return ...

In [None]:
KFoldCrossValidation(...)

## (b)

In [None]:
# L2 Regularizaiton by setting the "l2" parameter in KFoldCrossValidation
KFoldCrossValidation(...)

## (c)

*For debugging*: You should get 331 features.

In [None]:
from sklearn.decomposition import PCA

...

In [None]:
# Use one hidden layer of size 50, no Dropouts
class Net50PCA(nn.Module):
    def __init__(self):
        ...
    
    def forward(self, x):
        ...

In [None]:
KFoldCrossValidation(...)

## (d)

In [None]:
# If you find Dropout is better, finish this Net50PCADropout and do K-Fold CrossValidation

# class Net50PCADropout(nn.module):
#     def __init__(self):
#         super().__init__()
#         ...
    
#     def forward(self, x):
#         return ...

In [None]:
# If you find L2 Regularization is better, 
# just call KFoldCrossValidation with Net50PCA and l2 set to non-zeros

In [None]:
KFoldCrossValidation(...)