In [None]:
# Load Cifar10 as datasets
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

training_data = datasets.CIFAR10(
    root = 'data',
    train = True,
    download = True,
    transform = ToTensor()
)

test_data = datasets.CIFAR10(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Create dataloaders from them

from torch.utils.data import DataLoader

train_dataloader = DataLoader(training_data, batch_size = 100, shuffle = True)
test_dataloader = DataLoader(test_data, batch_size = 100, shuffle = True)

In [None]:
# Create a basic neural net to compute results

import torch.nn as nn
import torch.nn.functional as F

class CNN_1(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 3)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 3)
    self.l1 = nn.Linear(16 * 5 * 5, 120)
    self.l2 = nn.Linear(120, 84)
    self.l3 = nn.Linear(84, 10)
    # fully connected layer, output 10 classes

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x))) # perform the convolution, then the activation function on the layer
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x,1)
    x = F.relu(self.l1(x))
    x = F.relu(self.l2(x))
    x = self.l3(x)
    # Flatten the out
    return x


In [None]:
class CNN_2(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.out = nn.Linear(32 * 8 * 8, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x)) # perform the convolution
    x = F.relu(self.conv2(x))
    x= x.view(x.size(0), -1)
    output = self.out(x)
    # Flatten the out
    return output

In [None]:
class CNN_3(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv2 = nn.Sequential(
        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(2)
    )
    self.conv3 = nn.Sequential(
        nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 3, stride = 1, padding = 1),
        nn.ReLU(),
        nn.MaxPool2d(2)
        )
    self.out = nn.Linear(64 * 4 * 4, 10)

  def forward(self, x):
    x = F.relu(self.conv1(x)) # perform the convolution
    x = F.relu(self.conv2(x))
    x = F.relu(self.conv3(x))
    x= x.view(x.size(0), -1)
    output = self.out(x)
    # Flatten the out
    return output

RuntimeError                              Traceback (most recent call last)
<ipython-input-14-1aec4c419434> in <module>
     14   train(dataloader = train_dataloader, 
     15         model = model1, loss_func = loss_func,
---> 16         optimizer = optimizer)

7 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py in _conv_forward(self, input, weight, bias)
    452                             _pair(0), self.dilation, self.groups)
    453         return F.conv2d(input, weight, bias, self.stride,
--> 454                         self.padding, self.dilation, self.groups)
    455 
    456     def forward(self, input: Tensor) -> Tensor:

RuntimeError: Given groups=1, weight of size [16, 1, 5, 5], expected input[100, 3, 32, 32] to have 1 channels, but got 3 channels instead

In [None]:
def train(dataloader, model, loss_func, optimizer):
  size = len(dataloader.dataset)
  model.train()

  # Train the model on every batch; perform forward then backward
  for batch, (X, y) in enumerate(dataloader):
    #X, y = X.to(device), y.to(device) # ??

    # Compute the prediction error
    pred = model(X)
    loss = loss_func(pred, y)
 
    # Clear the previous gradients
    optimizer.zero_grad()

    # computes graidents
    loss.backward()

    # Applies gradients
    optimizer.step()

    # Every 100 batches, print the loss
    if batch % 100 == 0:
        loss, current_batch = loss.item(), batch * len(X)
        # print("For the batch " + str(current_batch) + " the loss is " + str(loss))

In [None]:
# Determine the accuracy of the neural net
import torch
def test(dataloader, model, loss_func):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval() 
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      pred = model(X)
      test_loss += loss_func(pred, y).item() # Add the loss amount
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
                   
  test_loss /= num_batches
  correct /= size
  print("The test error: ")
  print("\t Accuracy: ", 100 * correct)
  print("\t Avg Loss: ", test_loss)

In [None]:
# create models
model1 = CNN_1()

# create an optimizer, ADAM first
optimizer = torch.optim.Adam(model1.parameters(), lr = .001)

# create a loss function
loss_func = nn.CrossEntropyLoss()

# Train the nn
epochs = 10

for epoch in range(epochs):
  train(train_dataloader, model1, loss_func, optimizer)

For the batch 0 the loss is 2.299276351928711
For the batch 10000 the loss is 1.936888575553894
For the batch 20000 the loss is 1.7993218898773193
For the batch 30000 the loss is 1.822350025177002
For the batch 40000 the loss is 1.5451610088348389
For the batch 0 the loss is 1.571778416633606
For the batch 10000 the loss is 1.5351146459579468
For the batch 20000 the loss is 1.4890764951705933
For the batch 30000 the loss is 1.5546023845672607
For the batch 40000 the loss is 1.3030287027359009
For the batch 0 the loss is 1.592533826828003
For the batch 10000 the loss is 1.344964861869812
For the batch 20000 the loss is 1.5158953666687012
For the batch 30000 the loss is 1.5160419940948486
For the batch 40000 the loss is 1.4254355430603027
For the batch 0 the loss is 1.4004729986190796
For the batch 10000 the loss is 1.5541415214538574
For the batch 20000 the loss is 1.6625869274139404
For the batch 30000 the loss is 1.3081164360046387
For the batch 40000 the loss is 1.4563826322555542
Fo

In [None]:
# Test results for model 1, using adam optimizer
test(test_dataloader, model1, loss_func)

The test error: 
	 Accuracy:  55.730000000000004
	 Avg Loss:  1.2278882354497909


In [None]:
#train and test model 2
model2 = CNN_2() #cnn 2 is a deeper extension of cnn1 with an additional convolutional layer
optimizer2 = torch.optim.Adam(model2.parameters(), lr = .001)
for epoch in range(epochs):
  train(train_dataloader, model2, loss_func, optimizer)

RuntimeError: ignored

In [None]:
test(test_dataloader, model2, loss_func)

In [None]:
lr = [.0001, .001, .01]
momentums = [.7,.8,.9]

outputs = []
for l in lr:
  # create an optimizer, ADAM first
  optimizer = torch.optim.Adam(model1.parameters(), lr = l)
  for epoch in range(epochs):
    train(train_dataloader, model1, loss_func, optimizer)
  test(test_dataloader, model1, loss_func)

The test error: 
	 Accuracy:  59.550000000000004
	 Avg Loss:  1.1599075984954834
The test error: 
	 Accuracy:  59.61
	 Avg Loss:  1.1560079091787339
The test error: 
	 Accuracy:  54.779999999999994
	 Avg Loss:  1.2966955476999282


Clearly changing the learning rate greatly swings the accuacy of the CNN. Too high of a learning rate + too low of a learning rate gives us a more poor accuracy.

In [None]:
for m in momentums:
  # create an optimizer, ADAM first
  optimizer = torch.optim.SGD(model1.parameters(), lr=0.01, momentum=m)
  # optimizer = torch.optim.Adam(model1.parameters(), lr = .001, momentum = m)
  for epoch in range(epochs):
    train(train_dataloader, model1, loss_func, optimizer)
  test(test_dataloader, model1, loss_func)

The test error: 
	 Accuracy:  57.85
	 Avg Loss:  1.232039583325386
The test error: 
	 Accuracy:  57.97
	 Avg Loss:  1.243187518119812
The test error: 
	 Accuracy:  56.54
	 Avg Loss:  1.3101933073997498


Again, momentum does effect our accuracy greatly. Too much/little momentum will give us poor accuracy.