In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plot
import numpy as np

In [2]:
#Device configuration 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
#hyper parameters
num_epochs = 5
batch_size = 4
learning_rate = .001


In [4]:
#dataset has PILIImage images of range [0,1]
#Transform to Tensors of normalized range [-1,1]
transform = transforms.Compose(
    [transforms.ToTensor(), 
     transforms.Normalize((.5, .5, .5), (.5, .5, .5))])


In [5]:
train_dataset = torchvision.datasets.CIFAR10(root = './data', train=True, download = True, transform = transform)

test_dataset = torchvision.datasets.CIFAR10(root = './data', train=False, download = True, transform = transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True) 

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle = True) 


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [6]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [7]:
#implement conv net 
class ConvNet(nn.Module):
  def __init__(self):
    super(ConvNet, self).__init__()
    self.conv1 = nn.Conv2d(3,6, 5 ) #3 First conv layer: input color channel, 6 output color channel, 5 kernel size
    self.pool = nn.MaxPool2d(2,2) #2x2 kernel size, and stride of 2 
    self.conv2 = nn.Conv2d(6, 16, 5) #second conv layer; input channel must be = to the output channel of the previous convolutional layer 
    self.fc1 = nn.Linear(16*5*5, 120) #fully connected layer; try 120 output
    self.fc2 = nn.Linear(120, 84) #fully connected layer; 120 input features; 84 output features 
    self.fc3 = nn.Linear(84, 10) #fully connected layer; 10 outputs because of 10 classes

    #16*5*5 & 10 are fixed... the rest can be experimented with 
    #16*5*5 comes from flatteniign the 3D tensor that is output from the convolutional layers, to a 1D layer for the fully connected linear layers
        #16x5x5 are the dimensions of the output of conv2
  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x))) #first conv and pooling layer 
    x = self.pool(F.relu(self.conv2(x))) #second conv and pooling layer 
    x = x.view(-1, 16*5*5) #flatten
    x = F.relu(self.fc1(x)) #first fully connected layer + activation function
    x = F.relu(self.fc2(x)) #second fully connected layer + activation function
    x = self.fc3(x)  #3rd fully connected layer, no activation function
    return x

    #no softmax function because this is included in our criterion defined below 
  





In [8]:
model = ConvNet().to(device)

In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [10]:
#Training
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    #original shape: [4,3,32,32] = 4,3,1024
    #input_layer: 3 input channels, 6 output channels, 5 kernel size
    images = images.to(device)
    labels = labels.to(device)

    #Forward Pass
    outputs = model(images)
    loss = criterion(outputs, labels)

    #Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if(i+1) % 2000 == 0:
      print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

  print('Finished training')



Epoch [1/5], Step [2000/12500], Loss: 2.2885
Epoch [1/5], Step [4000/12500], Loss: 2.3178
Epoch [1/5], Step [6000/12500], Loss: 2.3055
Epoch [1/5], Step [8000/12500], Loss: 2.2907
Epoch [1/5], Step [10000/12500], Loss: 2.2522
Epoch [1/5], Step [12000/12500], Loss: 2.0611
Finished training
Epoch [2/5], Step [2000/12500], Loss: 2.0287
Epoch [2/5], Step [4000/12500], Loss: 2.0607
Epoch [2/5], Step [6000/12500], Loss: 2.0552
Epoch [2/5], Step [8000/12500], Loss: 1.4333
Epoch [2/5], Step [10000/12500], Loss: 1.2274
Epoch [2/5], Step [12000/12500], Loss: 1.8577
Finished training
Epoch [3/5], Step [2000/12500], Loss: 1.8288
Epoch [3/5], Step [4000/12500], Loss: 1.2117
Epoch [3/5], Step [6000/12500], Loss: 1.4803
Epoch [3/5], Step [8000/12500], Loss: 1.4389
Epoch [3/5], Step [10000/12500], Loss: 2.2393
Epoch [3/5], Step [12000/12500], Loss: 0.6184
Finished training
Epoch [4/5], Step [2000/12500], Loss: 1.5972
Epoch [4/5], Step [4000/12500], Loss: 1.8143
Epoch [4/5], Step [6000/12500], Loss: 1.

In [11]:
#Testing
with torch.no_grad():
  n_correct = 0 
  n_samples = 0
  n_class_correct = [0 for i in range(10)]
  n_class_samples = [0 for i in range(10)]
  for images, labels in test_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)

    _, predicted = torch.max(outputs,1)
    n_samples += labels.size(0)
    n_correct += (predicted == labels).sum().item()

    for i in range(batch_size):
      label = labels[i]
      pred = predicted[i]

    if (label == pred):
      n_class_correct[label] += 1
    n_class_samples[label] +=1

  acc = 100.0*n_correct/n_samples
  print(f'Accuracy of the network: {acc} %')

  for i in range(10):
    acc = 100.0 * n_class_correct[i]/n_class_samples[i]
    print(f'Accuracy of {classes[i]}: {acc} %')




Accuracy of the network: 48.48 %
Accuracy of plane: 51.79282868525896 %
Accuracy of car: 50.202429149797574 %
Accuracy of bird: 43.89312977099237 %
Accuracy of cat: 38.429752066115704 %
Accuracy of deer: 15.859030837004406 %
Accuracy of dog: 47.65342960288809 %
Accuracy of frog: 50.19455252918288 %
Accuracy of horse: 54.66101694915254 %
Accuracy of ship: 61.2 %
Accuracy of truck: 64.54183266932272 %


Poor performance !!! (~50%)... Re-try with more epochs