# Problem 3

Use this notebook to write your code for problem 3.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## 3D - Convolutional network

As in problem 2, we have conveniently provided for your use code that loads and preprocesses the MNIST data.

In [4]:
# load MNIST data into PyTorch format
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# set batch size
batch_size = 32

# load training data downloaded into data/ folder
mnist_training_data = torchvision.datasets.MNIST('data/', train=True, download=True,
                                                transform=transforms.ToTensor())
# transforms.ToTensor() converts batch of images to 4-D tensor and normalizes 0-255 to 0-1.0
training_data_loader = torch.utils.data.DataLoader(mnist_training_data,
                                                  batch_size=batch_size,
                                                  shuffle=True)

# load test data
mnist_test_data = torchvision.datasets.MNIST('data/', train=False, download=True,
                                                transform=transforms.ToTensor())
test_data_loader = torch.utils.data.DataLoader(mnist_test_data,
                                                  batch_size=batch_size,
                                                  shuffle=False)

In [3]:
# look at the number of batches per epoch for training and validation
print(f'{len(training_data_loader)} training batches')
print(f'{len(training_data_loader) * batch_size} training samples')
print(f'{len(test_data_loader)} validation batches')

1875 training batches
60000 training samples
313 validation batches


In [4]:
# sample model

model = nn.Sequential(
    nn.Conv2d(1, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.5),

    nn.Conv2d(8, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.5),

    nn.Flatten(),
    nn.Linear(25*8, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
    # PyTorch implementation of cross-entropy loss includes softmax layer
)

In [5]:
# why don't we take a look at the shape of the weights for each layer
for p in model.parameters():
    print(p.data.shape)

torch.Size([8, 1, 3, 3])
torch.Size([8])
torch.Size([8, 8, 3, 3])
torch.Size([8])
torch.Size([64, 200])
torch.Size([64])
torch.Size([10, 64])
torch.Size([10])


In [6]:
# our model has some # of parameters:
count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

total params: 14178


In [7]:
# For a multi-class classification problem

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters())

In [8]:
# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(n_epochs):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Epoch 1/10:...........
	loss: 0.7919, acc: 0.7353, val loss: 0.2200, val acc: 0.9359
Epoch 2/10:...........
	loss: 0.4792, acc: 0.8476, val loss: 0.1909, val acc: 0.9469
Epoch 3/10:...........
	loss: 0.4545, acc: 0.8557, val loss: 0.2054, val acc: 0.9410
Epoch 4/10:...........
	loss: 0.4419, acc: 0.8606, val loss: 0.2262, val acc: 0.9375
Epoch 5/10:...........
	loss: 0.4360, acc: 0.8644, val loss: 0.3864, val acc: 0.8808
Epoch 6/10:...........
	loss: 0.4430, acc: 0.8640, val loss: 0.2000, val acc: 0.9429
Epoch 7/10:...........
	loss: 0.4365, acc: 0.8639, val loss: 0.2068, val acc: 0.9460
Epoch 8/10:...........
	loss: 0.4271, acc: 0.8671, val loss: 0.2189, val acc: 0.9429
Epoch 9/10:...........
	loss: 0.4254, acc: 0.8684, val loss: 0.1953, val acc: 0.9453
Epoch 10/10:...........
	loss: 0.4280, acc: 0.8677, val loss: 0.1773, val acc: 0.9438


Above, we output the training loss/accuracy as well as the validation loss and accuracy. Not bad! Let's see if you can do better.

Problem 3G

In [26]:
#conv2d N x M subtracts (N - 1) from height and (M - 1) from width
#MaxPool2d N divides height and width of input by N
model = nn.Sequential(
    nn.Conv2d(1, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.2),

    nn.Conv2d(8, 8, kernel_size=(3,3)),
    nn.ReLU(),
    nn.MaxPool2d(2),
    nn.Dropout(p=0.2),

    nn.Flatten(),
    nn.Linear(25*8, 64),
    nn.ReLU(),
    nn.Linear(64, 10)

)

count = 0
for p in model.parameters():
    n_params = np.prod(list(p.data.shape)).item()
    count += n_params
print(f'total params: {count}')

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

total params: 14178


In [29]:
# Train the model for 10 epochs, iterating on the data in batches
n_epochs = 10

# store metrics
training_accuracy_history = np.zeros([n_epochs, 1])
training_loss_history = np.zeros([n_epochs, 1])
validation_accuracy_history = np.zeros([n_epochs, 1])
validation_loss_history = np.zeros([n_epochs, 1])

for epoch in range(n_epochs):
    print(f'Epoch {epoch+1}/10:', end='')
    train_total = 0
    train_correct = 0
    # train
    model.train()
    for i, data in enumerate(training_data_loader):
        images, labels = data
        optimizer.zero_grad()
        # forward pass
        output = model(images)
        # calculate categorical cross entropy loss
        loss = criterion(output, labels)
        # backward pass
        loss.backward()
        optimizer.step()

        # track training accuracy
        _, predicted = torch.max(output.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # track training loss
        training_loss_history[epoch] += loss.item()
        # progress update after 180 batches (~1/10 epoch for batch size 32)
        if i % 180 == 0: print('.',end='')
    training_loss_history[epoch] /= len(training_data_loader)
    training_accuracy_history[epoch] = train_correct / train_total
    print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

    # validate
    test_total = 0
    test_correct = 0
    with torch.no_grad():
        model.eval()
        for i, data in enumerate(test_data_loader):
            images, labels = data
            # forward pass
            output = model(images)
            # find accuracy
            _, predicted = torch.max(output.data, 1)
            test_total += labels.size(0)
            test_correct += (predicted == labels).sum().item()
            # find loss
            loss = criterion(output, labels)
            validation_loss_history[epoch] += loss.item()
        validation_loss_history[epoch] /= len(test_data_loader)
        validation_accuracy_history[epoch] = test_correct / test_total
    print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

Epoch 1/10:...........
	loss: 0.1696, acc: 0.9474, val loss: 0.0833, val acc: 0.9751
Epoch 2/10:...........
	loss: 0.1338, acc: 0.9574, val loss: 0.0594, val acc: 0.9816
Epoch 3/10:...........
	loss: 0.1149, acc: 0.9639, val loss: 0.0551, val acc: 0.9819
Epoch 4/10:...........
	loss: 0.1039, acc: 0.9665, val loss: 0.0532, val acc: 0.9828
Epoch 5/10:...........
	loss: 0.0958, acc: 0.9695, val loss: 0.0479, val acc: 0.9845
Epoch 6/10:...........
	loss: 0.0879, acc: 0.9716, val loss: 0.0432, val acc: 0.9849
Epoch 7/10:...........
	loss: 0.0836, acc: 0.9730, val loss: 0.0405, val acc: 0.9861
Epoch 8/10:...........
	loss: 0.0809, acc: 0.9747, val loss: 0.0400, val acc: 0.9876
Epoch 9/10:...........
	loss: 0.0774, acc: 0.9750, val loss: 0.0399, val acc: 0.9869
Epoch 10/10:...........
	loss: 0.0751, acc: 0.9759, val loss: 0.0386, val acc: 0.9873


In [34]:
p = np.linspace(0, 1, 10)
#print(p)
for val in p:
  model = nn.Sequential(
      nn.Conv2d(1, 8, kernel_size=(3,3)),
      nn.ReLU(),
      nn.MaxPool2d(2),
      nn.Dropout(p=val),

      nn.Conv2d(8, 8, kernel_size=(3,3)),
      nn.ReLU(),
      nn.MaxPool2d(2),
      nn.Dropout(p=val),

      nn.Flatten(),
      nn.Linear(25*8, 64),
      nn.ReLU(),
      nn.Linear(64, 10)
  )

  n_epochs = 1
  criterion = nn.CrossEntropyLoss()

  optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

  # store metrics
  training_accuracy_history = np.zeros([n_epochs, 1])
  training_loss_history = np.zeros([n_epochs, 1])
  validation_accuracy_history = np.zeros([n_epochs, 1])
  validation_loss_history = np.zeros([n_epochs, 1])

  for epoch in range(n_epochs):
      print(f'p = {val}', end='')
      train_total = 0
      train_correct = 0
      # train
      model.train()
      for i, data in enumerate(training_data_loader):
          images, labels = data
          optimizer.zero_grad()
          # forward pass
          output = model(images)
          # calculate categorical cross entropy loss
          loss = criterion(output, labels)
          # backward pass
          loss.backward()
          optimizer.step()

          # track training accuracy
          _, predicted = torch.max(output.data, 1)
          train_total += labels.size(0)
          train_correct += (predicted == labels).sum().item()
          # track training loss
          training_loss_history[epoch] += loss.item()
          # progress update after 180 batches (~1/10 epoch for batch size 32)
          if i % 180 == 0: print('.',end='')
      training_loss_history[epoch] /= len(training_data_loader)
      training_accuracy_history[epoch] = train_correct / train_total
      print(f'\n\tloss: {training_loss_history[epoch,0]:0.4f}, acc: {training_accuracy_history[epoch,0]:0.4f}',end='')

      # validate
      test_total = 0
      test_correct = 0
      with torch.no_grad():
          model.eval()
          for i, data in enumerate(test_data_loader):
              images, labels = data
              # forward pass
              output = model(images)
              # find accuracy
              _, predicted = torch.max(output.data, 1)
              test_total += labels.size(0)
              test_correct += (predicted == labels).sum().item()
              # find loss
              loss = criterion(output, labels)
              validation_loss_history[epoch] += loss.item()
          validation_loss_history[epoch] /= len(test_data_loader)
          validation_accuracy_history[epoch] = test_correct / test_total
      print(f', val loss: {validation_loss_history[epoch,0]:0.4f}, val acc: {validation_accuracy_history[epoch,0]:0.4f}')

p = 0.0...........
	loss: 0.2908, acc: 0.9116, val loss: 0.0881, val acc: 0.9715
p = 0.1111111111111111...........
	loss: 0.3356, acc: 0.8960, val loss: 0.0946, val acc: 0.9696
p = 0.2222222222222222...........
	loss: 0.3948, acc: 0.8735, val loss: 0.1261, val acc: 0.9600
p = 0.3333333333333333...........
	loss: 0.4698, acc: 0.8493, val loss: 0.1281, val acc: 0.9640
p = 0.4444444444444444...........
	loss: 0.6128, acc: 0.7993, val loss: 0.1659, val acc: 0.9558
p = 0.5555555555555556...........
	loss: 0.7298, acc: 0.7596, val loss: 0.2048, val acc: 0.9490
p = 0.6666666666666666...........
	loss: 0.9230, acc: 0.6943, val loss: 0.2781, val acc: 0.9418
p = 0.7777777777777777...........
	loss: 1.3496, acc: 0.5382, val loss: 0.6467, val acc: 0.8682
p = 0.8888888888888888...........
	loss: 1.6058, acc: 0.4434, val loss: 0.7636, val acc: 0.8542
p = 1.0...........
	loss: 2.3017, acc: 0.1118, val loss: 2.3011, val acc: 0.1135
