<a href="https://colab.research.google.com/github/callistachang/pytorch-stuffs/blob/master/cnn_cifar10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## import libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

# use the GPU if we can
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


## initialize parameters

In [2]:
# https://towardsdatascience.com/epoch-vs-iterations-vs-batch-size-4dfb9c7ce9c9

# number of times the entire dataset is passed through the network
NUM_EPOCHS = 5

# number of samples in a batch (/observed together)
# the higher, the more memory used
BATCH_SIZE = 4

# step size during SGD; aka the amount that weights are updated during training
# currently, the weights in the network are updated by 0.1% of the estimated weight error each time
LEARNING_RATE = 0.001

## import datasets

In [3]:
# the dataset has PIL images of range [0, 1]
# we want to transform them to tensors with normalized range [-1, 1]
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# CIFAR10 : 60k 32x32 color images in 10 equally divided classes
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

CLASSES = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


## create neural network class

In [4]:
class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    # in_channels = 3 because RGB
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
    # in_features = 16*5*5 because it's the 5x5 filter with depth=16
    self.fc1 = nn.Linear(in_features=16*5*5, out_features=120)
    self.fc2 = nn.Linear(in_features=120, out_features=84)
    # out_channels = 10 because there are 10 possible classes
    self.fc3 = nn.Linear(in_features=84, out_features=10)
    
    # the size of the pool is 2x2
    # when the pool is 'moved', it is moved 2 to the side, hence stride = 2 (no overlapping)
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))

    """
    https://stackoverflow.com/questions/42479902/how-does-the-view-method-work-in-pytorch
    Say that a = torch.range(1, 16), creating a 1x16 tensor.
    If you wanted to reshape it to a 4x4 tensor, you can use a = a.view(4, 4).
    If you know the amount of columns you need but not the rows, that's where you use the -1 parameter.
    """
    x = x.view(-1, 16*5*5)

    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

# initialize the model, criterion for calculating loss and algorithm for optimizing the neural network

In [5]:
model = Net().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

# train the model

In [None]:
for epoch in range(NUM_EPOCHS):
  # enumerates through, batch by batch
  for i, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)

    # 1) forward pass
    outputs = model(images)

    # 2) calculate loss by comparing predicted vs. actual
    loss = criterion(outputs, labels)

    # 3) backpropagation
    optimizer.zero_grad() # clear gradients
    loss.backward()       
    optimizer.step()

    if (i+1) % 2000 == 0:
      print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Step [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

print("===")
print("Finished training")

torch.save(model.state_dict(), './cnn.pth')

Epoch [1/5], Step [2000/12500], Loss: 2.3073
Epoch [1/5], Step [4000/12500], Loss: 2.3162
Epoch [1/5], Step [6000/12500], Loss: 2.2797
Epoch [1/5], Step [8000/12500], Loss: 2.2802
Epoch [1/5], Step [10000/12500], Loss: 2.2679
Epoch [1/5], Step [12000/12500], Loss: 2.0668
Epoch [2/5], Step [2000/12500], Loss: 2.0742
Epoch [2/5], Step [4000/12500], Loss: 2.2769
Epoch [2/5], Step [6000/12500], Loss: 2.1161
Epoch [2/5], Step [8000/12500], Loss: 2.1132
Epoch [2/5], Step [10000/12500], Loss: 2.1617
Epoch [2/5], Step [12000/12500], Loss: 1.8172
Epoch [3/5], Step [2000/12500], Loss: 1.8847
Epoch [3/5], Step [4000/12500], Loss: 1.2473
Epoch [3/5], Step [6000/12500], Loss: 2.3333
Epoch [3/5], Step [8000/12500], Loss: 1.3391
Epoch [3/5], Step [10000/12500], Loss: 2.0433
Epoch [3/5], Step [12000/12500], Loss: 1.7008
Epoch [4/5], Step [2000/12500], Loss: 2.0504
Epoch [4/5], Step [4000/12500], Loss: 1.0061
Epoch [4/5], Step [6000/12500], Loss: 2.1933
Epoch [4/5], Step [8000/12500], Loss: 0.9504
Epoc

# test the model

In [21]:
model = Net().to(device)
model.load_state_dict(torch.load('./cnn.pth'))
model.eval()

with torch.no_grad():
  num_samples = 0
  num_correct = 0
  num_class_samples = [0 for i in range(10)]
  num_class_correct = [0 for i in range(10)]

  # iterate through all batches (containing BATCH_SIZE=4 samples each) in the test_dataset
  for images, labels in test_loader:
    images = images.to(device)

    """
    A 1x4 tensor containing the correct answers for 4 images
    E.g. tensor([3, 8, 8, 8])
    """
    labels = labels.to(device)

    """
    A 4x10 tensor containing the forward-passed outputs (10 of them, because 10 classes) for 4 images
    Each output is between -1 to 1
    """
    outputs = model(images)

    """
    Translates the 4x10 tensor to the format of a 1x4 tensor
    by selecting the maximum value out of the 10 values 
    in the form of (values, indices).
    We only need to take the indices to get the class that it chose.
    """
    _, preds = torch.max(input=outputs, dim=1)

    num_samples += labels.size(0)
    num_correct += (preds == labels).sum().item()

    for i in range(BATCH_SIZE):
      label = labels[i]
      pred = preds[i]
      if label == pred:
        num_class_correct[label] += 1
      num_class_samples[label] += 1

  accuracy = num_correct / num_samples * 100
  print(f'Overall accuracy: {num_correct}/{num_samples} ({accuracy:.2f}%)')

  for i in range(10):
    accuracy_class = num_class_correct[i] / num_class_samples[i] * 100
    print(f'Accuracy of predicting {CLASSES[i]}: {num_class_correct[i]}/{num_class_samples[i]} ({accuracy_class:.2f}%)')

Overall accuracy: 4836/10000 (48.36%)
Accuracy of predicting plane: 543/1000 (54.30%)
Accuracy of predicting car: 656/1000 (65.60%)
Accuracy of predicting bird: 260/1000 (26.00%)
Accuracy of predicting cat: 221/1000 (22.10%)
Accuracy of predicting deer: 323/1000 (32.30%)
Accuracy of predicting dog: 458/1000 (45.80%)
Accuracy of predicting frog: 727/1000 (72.70%)
Accuracy of predicting horse: 614/1000 (61.40%)
Accuracy of predicting ship: 602/1000 (60.20%)
Accuracy of predicting truck: 432/1000 (43.20%)
