In [1]:
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt

We will use the CIFAR10 dataset. It has the classes: ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’. The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size.

In [3]:
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim

In [4]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


Training an image classifier
----------------------------

We will do the following steps in order:

1. Load and normalizing the CIFAR10 training and test datasets using
   ``torchvision``
2. Define a Convolution Neural Network
3. Define a loss function
4. Train the network on the training data
5. Test the network on the test data

In [5]:
def _num_flat_features(x):
    size = x.size()[1:]  # all dimensions except the batch dimension
    num_features = 1
    for s in size:
        num_features *= s
    return num_features


class Net_C(nn.Module):
    def __init__(self, M, p, N):
        '''
        M is the number of output channels, p is the convolution kernel size, 
        N is the max pooling kernel (ideally, it is a divisor of 33-p)
        '''
        super(Net_C, self).__init__()
        # 3 input image channel, M output channels, pxpx3 square convolution; bias=True is default
        self.conv1 = nn.Conv2d(3, M, p)
        self.pool1 = nn.MaxPool2d(N)
        self.fc1 = nn.Linear(M*((33-p)//N)**2, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = x.view(-1, self.num_flat_features(x)) # 32 * 32 * 3
        x = self.fc1(x)
        return x
    
    def num_flat_features(self, x):
        return _num_flat_features(x)
    

class Net_B(nn.Module):
    def __init__(self, M):
        super(Net_B, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, M)
        self.fc2 = nn.Linear(M, 10)

    def forward(self, x):
        x = x.view(-1, self.num_flat_features(x)) # 32 * 32 * 3
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def num_flat_features(self, x):
        return _num_flat_features(x)
    

class Net_A(nn.Module):
    def __init__(self):
        super(Net_A, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 10)

    def forward(self, x):
        x = x.view(-1, self.num_flat_features(x)) # 32 * 32 * 3
        x = self.fc1(x) 
        return x
    
    def num_flat_features(self, x):
        return _num_flat_features(x)

In [64]:
def train_net(net, iter_trainloader, criterion, optimizer, epochs):
    '''
    Trains net and returns epoch-wise test loss
    '''
    epochs_train_loss = []
    for epoch in range(epochs):  # loop over the dataset multiple times
        running_loss = 0.0
        size = 0
        for data in iter_trainloader: # AG: What is happening with this 0? 
            # get the inputs
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # sum statistics
            size += 1
            running_loss += loss.item()
        # record loss    
        epochs_train_loss.append(running_loss / size)
    return epochs_train_loss

In [82]:
def test_net(net, iter_testloader, criterion):
    with torch.no_grad():
        correct = 0
        total = 0
        test_loss = 0.0
        size = 0
        for data in iter_testloader:
            images, labels = data
            outputs = net(images)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            # Sum statistics
            test_loss += loss.item()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            size += 1
    return test_loss / size, 100 * correct / total

In [66]:
def percent_accuracy(net, iter_testloader):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

In [48]:
# Shared criterion for loss
criterion_all = nn.CrossEntropyLoss()

Making sure everything works...
-----------

In [88]:
# Create new NN's, print
# netA = Net_A()
# netB = Net_B(100)
# netC = Net_C(100, 5, 2)
print(netA)
print(netB)
print(netC)

Net_A(
  (fc1): Linear(in_features=3072, out_features=10, bias=True)
)
Net_B(
  (fc1): Linear(in_features=3072, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)
Net_C(
  (conv1): Conv2d(3, 100, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=19600, out_features=10, bias=True)
)


In [89]:
# This code made sure the NN's functioned correctly
test_input = torch.randn(4, 3, 32, 32)
for inet in [netB]:
    out = inet(test_input)
    print(out)

tensor([[ 0.6351,  0.1992,  0.3159,  0.0204,  0.0754, -0.0463,  0.3125, -0.0823,
         -0.0979, -0.3068],
        [-0.0114,  0.1704,  0.0322, -0.2838,  0.1890, -0.3320,  0.0448, -0.2956,
         -0.2089, -0.3648],
        [ 0.1855,  0.4247,  0.1155, -0.0184, -0.1142, -0.2607,  0.3533, -0.4119,
         -0.4639,  0.0784],
        [-0.1018,  0.0246,  0.2024,  0.1081,  0.0579, -0.4817,  0.1117, -0.4587,
         -0.0535,  0.0337]], grad_fn=<ThAddmmBackward>)


working with Network A
---------------

In [None]:
netA = Net_A()

In [103]:
netA(test_input);

In [67]:
# Declare some optimizer for Net A
resA=[]
optimizerA = optim.SGD(netA.parameters(), lr=0.05, momentum=0.9)

In [74]:
%%time
resA.extend(train_net(netA, trainloader, criterion_all, optimizerA, 2))
print('train:\t', ', '.join(['%.3f' % i for i in resA]))
loss, percent = test_net(netA, testloader, criterion_all)
print('test:\t', '%.3f' % loss )
print('%d%% accuracy on 10000 test images.' % percent)

train:	 79.920, 76.249, 73.668, 71.825, 70.093, 69.397
test:	 76.059
28% accuracy on 10000 test images.
CPU times: user 3min 1s, sys: 8.29 s, total: 3min 10s
Wall time: 1min 29s


Comments:

3 miuntes for 2 epochs => 15 minutes for 10 epochs

If 3 hours alloted, number of random guesses allowed: 4*3 = 12

working with Network B
---------------

In [108]:
netB = Net_B(100)

In [109]:
resB=[]
optimizerB = optim.SGD(netB.parameters(), lr=0.01, momentum=0.5)

In [110]:
netB(test_input)

tensor([[ 0.2983,  0.0816,  0.0660,  0.5593, -0.2924, -0.1701,  0.2301, -0.1060,
         -0.0908,  0.0986],
        [ 0.0435,  0.0322,  0.3295,  0.5355, -0.0843,  0.3337, -0.2070, -0.2223,
         -0.1454,  0.0421],
        [ 0.5170,  0.2725,  0.0528,  0.2867, -0.4661,  0.0534,  0.1140, -0.4153,
          0.0380,  0.4172],
        [ 0.1747,  0.3758,  0.3047, -0.3355, -0.0321, -0.2213, -0.1787,  0.0350,
         -0.3845, -0.0765]], grad_fn=<ThAddmmBackward>)

In [None]:
%%time
resB.extend(train_net(netB, trainloader, criterion_all, optimizerB, 2))
print('train:\t', ', '.join(['%.3f' % i for i in resB]))
loss, percent = test_net(netB, testloader, criterion_all)
print('test:\t', '%.3f' % loss )
print('%d%% accuracy on 10000 test images.' % percent)

In [92]:
loss, percent = test_net(netB, testloader, criterion_all)
print('test:\t', '%.3f' % loss )
print('%d%% accuracy on 10000 test images.' % percent)

test:	 2.321
8% accuracy on 10000 test images.


In [94]:
resB

[nan, nan]

Comments:

Why did B report nan for solutions with lr = 0.05 and p = .9? That's garbage.
Tried .1. Still garbage. So can't learn too quickly.
I tried it with lr=0.001, momentum=0.5 and I got answers but it never moved.

Similar time for A and B

working with Network C
-----------------------

In [95]:
netC = Net_C(100, 5, 2)

In [None]:
netC(test_input)

In [None]:
resC=[]
optimizerC = optim.SGD(netC.parameters(), lr=0.01, momentum=0.9)

In [None]:
%%time
resC.extend(train_net(netC, trainloader, criterion_all, optimizerC, 2))
print('train:\t', ', '.join(['%.3f' % i for i in resC]))
loss, percent = test_net(netC, testloader, criterion_all)
print('test:\t', '%.3f' % loss )
print('%d%% accuracy on 10000 test images.' % percent)