In [1]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import tqdm
import numpy as np
import utils
import dataloaders
import torchvision
from trainer import Trainer
torch.random.manual_seed(0)
np.random.seed(0)

### Dataset

In [2]:
# Load the dataset and print some stats
batch_size = 64

image_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

dataloader_train, dataloader_test = dataloaders.load_dataset(batch_size, image_transform)
example_images, _ = next(iter(dataloader_train))
print(f"The tensor containing the images has shape: {example_images.shape} (batch size, number of color channels, height, width)",
      f"The maximum value in the image is {example_images.max()}, minimum: {example_images.min()}", sep="\n\t")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNIST\raw\train-images-idx3-ubyte.gz to data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNIST\raw\train-labels-idx1-ubyte.gz to data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNIST\raw\t10k-images-idx3-ubyte.gz to data/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNIST\raw\t10k-labels-idx1-ubyte.gz to data/MNIST\raw

The tensor containing the images has shape: torch.Size([64, 1, 28, 28]) (batch size, number of color channels, height, width)
	The maximum value in the image is 1.0, minimum: 0.0


In [3]:
def create_model():
    """
        Initializes the mode. Edit the code below if you would like to change the model.
    """
    model = nn.Sequential(
        nn.Flatten(), # Flattens the image from shape (batch_size, C, Height, width) to (batch_size, C*height*width)
        nn.Linear(28*28*1, 10)
        # No need to include softmax, as this is already combined in the loss function
    )
    # Transfer model to GPU memory if a GPU is available
    model = utils.to_cuda(model)
    return model


model = create_model()

In [None]:
# Test if the model is able to do a single forward pass
example_images = utils.to_cuda(example_images)
output = model(example_images)
print("Output shape:", output.shape)
expected_shape = (batch_size, 10) # 10 since mnist has 10 different classes
assert output.shape == expected_shape,    f"Expected shape: {expected_shape}, but got: {output.shape}"

### Hyperparameters & Loss function

In [None]:
# Hyperparameters
learning_rate = .0192
num_epochs = 5


# Use CrossEntropyLoss for multi-class classification
loss_function = torch.nn.CrossEntropyLoss()

# Define optimizer (Stochastic Gradient Descent)
optimizer = torch.optim.SGD(model.parameters(),
                            lr=learning_rate)

### Train model

In [None]:
trainer = Trainer(
  model=model,
  dataloader_train=dataloader_train,
  dataloader_test=dataloader_test,
  batch_size=batch_size,
  loss_function=loss_function,
  optimizer=optimizer
)
train_loss_dict, test_loss_dict = trainer.train(num_epochs)

In [None]:
# We can now plot the training loss with our utility script

# Plot loss
utils.plot_loss(train_loss_dict, label="Train Loss")
utils.plot_loss(test_loss_dict, label="Test Loss")
# Limit the y-axis of the plot (The range should not be increased!)
plt.ylim([0, 1])
plt.legend()
plt.xlabel("Global Training Step")
plt.ylabel("Cross Entropy Loss")
plt.savefig("image_solutions/task_4a.png")

plt.show()

torch.save(model.state_dict(), "saved_model.torch")
final_loss, final_acc = utils.compute_loss_and_accuracy(
    dataloader_test, model, loss_function)
print(f"Final Test loss: {final_loss}. Final Test accuracy: {final_acc}")

## Plotting example

### Task A

In [None]:
torch.random.manual_seed(0)
np.random.seed(0)


batch_size = 64
learning_rate = .0192
num_epochs = 5

# Normalize the data [0, 1]
dataloader_train, dataloader_test = dataloaders.load_dataset(batch_size, image_transform)
model = create_model()

# Redefine optimizer, as we have a new model.
optimizer = torch.optim.SGD(model.parameters(),
                            lr=learning_rate)
trainer = Trainer(
  model=model,
  dataloader_train=dataloader_train,
  dataloader_test=dataloader_test,
  batch_size=batch_size,
  loss_function=loss_function,
  optimizer=optimizer
)
train_loss_dict, test_loss_dict = trainer.train(num_epochs)

final_loss, final_acc = utils.compute_loss_and_accuracy(
    dataloader_test, model, loss_function)

# Normalize the data [-1, 1]
image_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5), (0.5)),
])

dataloader_train, dataloader_test = dataloaders.load_dataset(batch_size, image_transform)

trainer = Trainer(
  model=model,
  dataloader_train=dataloader_train,
  dataloader_test=dataloader_test,
  batch_size=batch_size,
  loss_function=loss_function,
  optimizer=optimizer
)

train_loss_dict_normalized, test_loss_dict_normalized = trainer.train(num_epochs)

final_loss_normalized, final_acc_normalized = utils.compute_loss_and_accuracy(
    dataloader_test, model, loss_function)



In [None]:
# We can now plot the two models against eachother

# Plot loss
utils.plot_loss(train_loss_dict, label="Train Loss - Original model")
utils.plot_loss(test_loss_dict, label="Test Loss - Original model")
utils.plot_loss(train_loss_dict_normalized, label="Train Loss - Normalized data")
utils.plot_loss(test_loss_dict_normalized, label="Test Loss - Normalized data")
# Limit the y-axis of the plot (The range should not be increased!)
plt.ylim([0, 1])
plt.legend()
plt.xlabel("Global Training Step")
plt.ylabel("Cross Entropy Loss")
plt.savefig("image_solutions/task_4a.png")

plt.show()

torch.save(model.state_dict(), "saved_model.torch")

print(f"Final Orignal Test loss: {final_loss}. Final Original Test accuracy: {final_acc}")
print(f"Final Normalized Test loss: {final_loss_normalized}. Final Normalized Test accuracy: {final_acc_normalized}")

### Task B

In [None]:
weights = list(model.children())[1].weight.cpu().data
fig = plt.figure(num=None, figsize=(26, 12), dpi=80, facecolor='w', edgecolor='k')
for num_class, num_weights in enumerate(weights):
    im = np.reshape(num_weights, (28, 28))
    ax = fig.add_subplot(2, 5, num_class + 1)
    ax.title.set_text(f'num_class: {num_class}')
    plt.imshow(im, cmap="gray")

### Task C

In [None]:
torch.random.manual_seed(0)
np.random.seed(0)

batch_size = 64
learning_rate = 1.0
num_epochs = 5

# Redefine optimizer, as we have a new model.
optimizer = torch.optim.SGD(model.parameters(),
                            lr=learning_rate)

# Normalize the data [-1, 1]
image_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5), (0.5)),
])

dataloader_train, dataloader_test = dataloaders.load_dataset(batch_size, image_transform)

trainer = Trainer(
  model=model,
  dataloader_train=dataloader_train,
  dataloader_test=dataloader_test,
  batch_size=batch_size,
  loss_function=loss_function,
  optimizer=optimizer
)

train_loss_dict_lr, test_loss_dict_lr = trainer.train(num_epochs)

final_loss_lr, final_acc_lr = utils.compute_loss_and_accuracy(
    dataloader_test, model, loss_function)



In [None]:
average_loss = round(sum(test_loss_dict_lr.values())/len(test_loss_dict_lr.keys()), 3)

# Plot loss
utils.plot_loss(train_loss_dict_lr, label="Train Loss - Learning rate = 1.0")
utils.plot_loss(test_loss_dict_lr, label="Test Loss - Learning rate = 1.0")
# Limit the y-axis of the plot (The range should not be increased!)
plt.ylim([0, 19])
plt.legend()
plt.xlabel("Global Training Step")
plt.ylabel("Cross Entropy Loss")
plt.savefig("image_solutions/task_4a.png")

plt.show()

torch.save(model.state_dict(), "saved_model.torch")

print(f"Final Test loss: {final_loss_lr}. Final Test accuracy: {final_acc_lr}. Average cross entropy loss on validation set: {average_loss}")

### Task D

In [None]:
def create_relu_model():
    """
        Initializes the mode. Edit the code below if you would like to change the model.
    """
    model = nn.Sequential(
        nn.Flatten(), # Flattens the image from shape (batch_size, C, Height, width) to (batch_size, C*height*width)
        nn.Linear(28*28*1, 64),
        nn.ReLU(),
        nn.Linear(64, 10),
        # No need to include softmax, as this is already combined in the loss function
    )
    # Transfer model to GPU memory if a GPU is available
    model = utils.to_cuda(model)
    return model


model = create_relu_model() 

In [None]:
torch.random.manual_seed(42)
np.random.seed(42)

batch_size = 64
learning_rate = 0.0192
num_epochs = 5

# Redefine optimizer, as we have a new model.
optimizer = torch.optim.SGD(model.parameters(),
                            lr=learning_rate)

# Normalize the data [-1, 1]
image_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5), (0.5)),
])

dataloader_train, dataloader_test = dataloaders.load_dataset(batch_size, image_transform)

trainer = Trainer(
  model=model,
  dataloader_train=dataloader_train,
  dataloader_test=dataloader_test,
  batch_size=batch_size,
  loss_function=loss_function,
  optimizer=optimizer
)

train_loss_dict_relu, test_loss_dict_relu = trainer.train(num_epochs)

final_loss_relu, final_acc_relu = utils.compute_loss_and_accuracy(
    dataloader_test, model, loss_function)


In [None]:
# We can now plot the two models against eachother

# Plot loss
utils.plot_loss(train_loss_dict, label="Train Loss - Original model")
utils.plot_loss(test_loss_dict, label="Test Loss - Original model")
utils.plot_loss(train_loss_dict_relu, label="Train Loss - ReLU model")
utils.plot_loss(test_loss_dict_relu, label="Test Loss - ReLU model")
# Limit the y-axis of the plot (The range should not be increased!)
plt.ylim([0, 3])
plt.legend()
plt.xlabel("Global Training Step")
plt.ylabel("Cross Entropy Loss")
plt.savefig("image_solutions/task_4a.png")

plt.show()

torch.save(model.state_dict(), "saved_model.torch")

print(f"Final Orignal Test loss: {final_loss}. Final Original Test accuracy: {final_acc}")
print(f"Final ReLU Test loss: {final_loss_relu}. Final ReLU Test accuracy: {final_acc_relu}")