# Packages

In [None]:
import numpy as np

import torch
from torch import nn
from torch.optim import SGD

from torchvision import datasets, transforms
from torch.utils.data import Subset, DataLoader

# From `Example-Notebook`

Nothing new in this section. Just repeating the code.

**Exercise:** clean up these notebooks by moving this section to its own module, and then importing it in both this note book and `Example-Notebook-Skeleton`

In [None]:
class TwoLayerDenseNet(nn.Module):
    def __init__(self, in_features, hidden_size, out_features):
        super().__init__()
        self.fc1 = nn.Linear(in_features, hidden_size)
        self.fc2 = nn.Linear(hidden_size, out_features)
        self.do = nn.Dropout(0.5)

    def forward(self, inputs):
        outputs = torch.relu(self.fc1(inputs))
        outputs = self.fc2(self.do(outputs))
        return outputs

In [None]:
def make_train_and_val_functions(model, criterion, optimizer):
    def train_step(batch):
        model.train()
        inputs, targets = batch
        y_logits = model(inputs)
        loss = criterion(y_logits, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        y_pred = torch.max(y_logits, 1).indices
        loss_value = loss.item()
        correct = (y_pred == targets.detach())
        return {'loss': loss_value, 
                'num_correct': correct.long().sum().numpy(), 
                'batch_accuracy': correct.float().mean().numpy(), 
                'batch_size': correct.numpy().size}

    def eval_step(batch):
        model.eval()
        inputs, targets = batch
        y_logits = model(inputs)
        loss = criterion(y_logits, targets)
        y_pred = torch.max(y_logits, 1).indices
        loss_value = loss.item()
        correct = (y_pred == targets.detach())
        return {'loss': loss_value, 
                'num_correct': correct.long().sum().numpy(), 
                'batch_accuracy': correct.float().mean().numpy(), 
                'batch_size': correct.numpy().size}
    return train_step, eval_step


# Example 3: MNIST Classification with Dense Network

## Import data

In [None]:
# Transforms
# ...
on_load_tsfm = None

In [None]:
DATA_DIR = "./data/"
train_digits = datasets.MNIST(DATA_DIR, train=True, transform=on_load_tsfm)
test_digits = datasets.MNIST(DATA_DIR, train=False, transform=on_load_tsfm)

## Preprocess data

In [None]:
train_size = train_digits.targets.size(0)
test_size = test_digits.targets.size(0)
print('number of images')
print(f'train: {train_size}')
print(f' test: {test_size}')



# define val and eval_train size
val_size = test_size
eval_train_size = val_size

# subset train_digits to form val and eval_train data
val_idx = np.random.choice(train_size, val_size, replace=False)
new_train_idx = np.setdiff1d(range(train_size), val_idx)

val_digits = Subset(train_digits, val_idx)
train_digits = Subset(train_digits, new_train_idx)
train_size = len(train_digits)

eval_train_idx = np.random.choice(train_size, eval_train_size, replace=False)
eval_train_digits = Subset(train_digits, eval_train_idx)

# create dataloaders
dl = {
    'train': DataLoader(train_digits, batch_size=16, shuffle=True),
    'eval_train': DataLoader(eval_train_digits, batch_size=128, shuffle=True),
    'val': DataLoader(val_digits, batch_size=128, shuffle=True),
    'test': DataLoader(test_digits, batch_size=128, shuffle=True)
}

## Create model



In [None]:
# Create a model called `model` with 10 hidden layers.
# ...

# Print out a summary of the model using `summary`
# ...

## Define objects for training

In [None]:
# Set the criterion to be cross-entropy loss
# ...
criterion = 

# Use stochastic gradient descent with:
# - learning rate of 1e-4, 
# - momentum=.9
# - nesterov=True
# ...
optimizer = 

# Instantiate an exponential learning rate scheduler
# with annealing rate of .9
# ...
scheduler = 

In [None]:
# Make train and eval stepping functions
# ...
train_step, eval_step = 

In [None]:
# loaded images will be square, but we 
# want vectors for our dense net.
_flatten = nn.Flatten()
def flatten(batch):
    images, labels = batch
    images = _flatten(images)
    batch = (images, labels)
    return batch

**Exercise:** Write the code required for 25 epochs of training. After each epoch has finished, evaluate the model on the `eval_train` dataloader and the `val` dataloader. Print out the epoch's average accuracy and loss for the two loaders. 

*Hint:* Use `flatten`, defined above to ensure that the (square) images are correctly formatted for the dense network.

In [None]:
# Train model
# ...

## Examine network output

For which digits does it do well? For which does it have a hard time? 

**Exercise:** Make a confusion matrix for the predictions on the test data. Either print it out or visualize it. 

*Hint:* Evaluate the entire test data `Tensor` at once (rather than looping through the `DataLoader` we created). Convert the resulting logits into to a numpy array and work with those. 

*Hint:* `sklearn` has a `confusion_matrix` function. 

*Hint:* `matplotlib`'s `matshow` is one way of visualizing the confusion matrix

**Exercise:** Using the test logits and labels obtained above, write a function that approximates the [AUROC](https://en.wikipedia.org/wiki/Receiver_operating_characteristic).

# Example 4: MNIST classification with CNN

## Create Network

In [None]:
class SmallCNN(nn.Module):
    def __init__(self, input_size, n_filters, num_classes):
        super().__init__()
        # n_filters; 3x3 convolution; padding=1
        self.c1 = nn.Conv2d(input_size[0], n_filters, kernel_size=(3,3), padding=1)
        # batch normalization
        self.bn1 = nn.BatchNorm2d(n_filters)
        # 2x2 max pool
        self.mp1 = nn.MaxPool2d((2,2))
        # use n_filters again; 3x3; padding=0
        self.c2 = 
        # batch normalization
        self.bn2 = 
        # 2x2 max-pooling
        self.mp2 = 
        # number of input features to the final layer
        fc_in = int(n_filters * (input_size[1] - 4) * (input_size[2] - 4)/16)
        # For reshaping the input to the final layer
        self.flatten = nn.Flatten()
        # The final layer
        self.fc = nn.Linear(fc_in, num_classes)
        
    def forward(self, inputs):
        # Convolve -> Batch Normalization -> ReLU -> MaxPool
        outputs = 
        # Convolve -> Batch Normalization -> ReLU -> MaxPool
        outputs = 
        # Fully connected
        outputs = 
        return outputs

In [None]:
# Create a SmallCNN with 8 filters and the appropriate number of classes
# ...
model = 

# Print out a summary
# ....

In [None]:
criterion = # cross entropy
optimizer = # SGD, same as before
scheduler = # exponential annealing, same as before
train_step, eval_step = # ...

In [None]:
# Re-use above code to write a 25 epoch training loop
# ...

## Examine the results

**Exercise:** Implement this section on [Google Colab](colab.research.google.com) where it will run much faster if you use a GPU-enabled notebook.

**Exercise:** Compare the classification results of the CNN to those for the dense net. Which does better?

