In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

## Load data

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./datasets/CIFAR-10', train=True,
                                        download=True, transform=transform)

testset = torchvision.datasets.CIFAR10(root='./datasets/CIFAR-10', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset) / 100,
                                         shuffle=False, num_workers=1)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


## split train and validation set

In [3]:
#https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb
def train_valid_loader(data, batchSize, nWorker, validSize=0.1, shuffle=True, pin_memory=True):
    nTrain = len(data)
    indices = list(range(nTrain))
    split = int(np.floor(validSize * nTrain))
    if shuffle == True:
        #np.random.seed(randSeed)
        np.random.shuffle(indices)
        
    train_i, valid_i = indices[split:], indices[:split]
    
    train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_i)
    valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(valid_i)
    
    trainLoader = torch.utils.data.DataLoader(data, 
                    batch_size=batchSize, sampler=train_sampler, 
                    num_workers=nWorker, pin_memory=pin_memory)
    
    validLoader = torch.utils.data.DataLoader(data, 
                    batch_size=batchSize, sampler=valid_sampler, 
                    num_workers=nWorker, pin_memory=pin_memory)
    return  trainLoader, validLoader, train_sampler

In [4]:
trainloader, validloader, nSample = train_valid_loader(trainset, 128, 1, 
                                           validSize=0.1, shuffle=True, pin_memory=True)
print (len(trainloader), len(validloader))

(352, 40)


In [5]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 92, 5)
        #self.conv1_bn = nn.BatchNorm2d(92)
        self.conv2 = nn.Conv2d(92, 184, 5)
        # an affine operation: y = Wx + b
        #self.conv2_bn = nn.BatchNorm2d(184)
        self.fc1 = nn.Linear(184 * 5 * 5, 120)
        #self.fc1_bn = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, 84)
        #self.fc2_bn = nn.BatchNorm1d(84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        #x = F.max_pool2d(F.relu(self.conv1_bn(self.conv1(x))), (2, 2))   
        x = F.max_pool2d(F.relu((self.conv1(x))), (2, 2))        
        # If the size is a square you can only specify a single number
        #x = F.max_pool2d(F.relu(self.conv2_bn(self.conv2(x))), 2)
        x = F.max_pool2d(F.relu((self.conv2(x))), 2)
        x = x.view(-1, self.num_flat_features(x))
        #x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.relu((self.fc1(x)))
        #x = F.relu(self.fc2_bn(self.fc2(x)))
        x = F.relu((self.fc2(x)))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
net = net.cuda()
print(net)

Net (
  (conv1): Conv2d(3, 92, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(92, 184, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (4600 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


## Add batch

In [6]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 92, 3, padding = 1)
        self.conv1_bn = nn.BatchNorm2d(92)
        
        self.conv2 = nn.Conv2d(92, 184, 3, padding = 1)
        # an affine operation: y = Wx + b
        self.conv2_bn = nn.BatchNorm2d(184)
        
        self.fc1 = nn.Linear(184 * 8 * 8, 120)
        self.fc1_bn = nn.BatchNorm1d(120)
        
        self.fc2 = nn.Linear(120, 84)
        self.fc2_bn = nn.BatchNorm1d(84)
        
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1_bn(self.conv1(x))), (2, 2))   
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2_bn(self.conv2(x))), 2)
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
net = net.cuda()
print(net)

Net (
  (conv1): Conv2d(3, 92, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_bn): BatchNorm2d(92, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(92, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_bn): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True)
  (fc1): Linear (11776 -> 120)
  (fc1_bn): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True)
  (fc2): Linear (120 -> 84)
  (fc2_bn): BatchNorm1d(84, eps=1e-05, momentum=0.1, affine=True)
  (fc3): Linear (84 -> 10)
)


In [7]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


In [8]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 300 == 299:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            running_loss = 0.0
            
    correct = 0
    total = 0
    for data in validloader:
        images, labels = data
        outputs = net(Variable(images.cuda()))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()

    print('Accuracy of the network on the valid images: %5.2f %%' % (
        100.0 * correct / total))

print('Finished Training')

[1,   300] loss: 1.851
Accuracy of the network on the valid images: 52.26 %
[2,   300] loss: 1.329
Accuracy of the network on the valid images: 61.90 %
[3,   300] loss: 1.047
Accuracy of the network on the valid images: 66.52 %
[4,   300] loss: 0.873
Accuracy of the network on the valid images: 68.74 %
[5,   300] loss: 0.748
Accuracy of the network on the valid images: 69.64 %
[6,   300] loss: 0.639
Accuracy of the network on the valid images: 70.28 %
[7,   300] loss: 0.538
Accuracy of the network on the valid images: 70.40 %
[8,   300] loss: 0.448
Accuracy of the network on the valid images: 70.36 %
[9,   300] loss: 0.368
Accuracy of the network on the valid images: 69.94 %
[10,   300] loss: 0.293
Accuracy of the network on the valid images: 69.60 %
[11,   300] loss: 0.232
Accuracy of the network on the valid images: 70.30 %
[12,   300] loss: 0.175
Accuracy of the network on the valid images: 69.50 %
[13,   300] loss: 0.133
Accuracy of the network on the valid images: 69.60 %
[14,   3

In [9]:
correct = 0
total = 0
for data in testloader:
    images, labels = data
    outputs = net(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %5.2f %%' % (
    100.0 * correct / total))

Accuracy of the network on the 10000 test images: 73.77 %


In [66]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
for data in testloader:
    images, labels = data
    outputs = net(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    c = (predicted == labels.cuda()).squeeze()
    for i in range(4):
        label = labels[i]
        class_correct[label] += c[i]
        class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 72 %
Accuracy of   car : 81 %
Accuracy of  bird : 55 %
Accuracy of   cat : 41 %
Accuracy of  deer : 65 %
Accuracy of   dog : 56 %
Accuracy of  frog : 75 %
Accuracy of horse : 90 %
Accuracy of  ship : 81 %
Accuracy of truck : 89 %


## Add Xavier

In [9]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 92, 5)
        #torch.nn.init.xavier_normal(self.conv1.weight)
        self.conv2 = nn.Conv2d(92, 184, 5)
        #torch.nn.init.xavier_normal(self.conv2.weight)
        self.fc1 = nn.Linear(184 * 5 * 5, 120)
        #torch.nn.init.xavier_normal(self.fc1.weight)
        self.fc2 = nn.Linear(120, 84)
        #torch.nn.init.xavier_normal(self.fc2.weight)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        #x = F.max_pool2d(F.relu(self.conv1_bn(self.conv1(x))), (2, 2))   
        x = F.max_pool2d(F.relu((self.conv1(x))), (2, 2))        
        # If the size is a square you can only specify a single number
        #x = F.max_pool2d(F.relu(self.conv2_bn(self.conv2(x))), 2)
        x = F.max_pool2d(F.relu((self.conv2(x))), 2)
        x = x.view(-1, self.num_flat_features(x))
        #x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.relu((self.fc1(x)))
        #x = F.relu(self.fc2_bn(self.fc2(x)))
        x = F.relu((self.fc2(x)))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

def weights_init(m):
    classname=m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_normal(m.weight.data)
        #torch.nn.init.xavier_normal(m.bias.data)

net = Net()
net = net.apply(weights_init)
net = net.cuda()
print(net)

Net (
  (conv1): Conv2d(3, 92, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(92, 184, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (4600 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [18]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)


In [10]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 300 == 299:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            running_loss = 0.0
            
    correct = 0
    total = 0
    for data in validloader:
        images, labels = data
        outputs = net(Variable(images.cuda()))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()

    print('Accuracy of the network on the valid images: %5.2f %%' % (
        100.0 * correct / total))

print('Finished Training')

[1,   300] loss: 0.026
Accuracy of the network on the valid images: 74.96 %
[2,   300] loss: 0.028
Accuracy of the network on the valid images: 74.42 %
[3,   300] loss: 0.027
Accuracy of the network on the valid images: 74.68 %
[4,   300] loss: 0.024
Accuracy of the network on the valid images: 74.28 %
[5,   300] loss: 0.026
Accuracy of the network on the valid images: 75.10 %
[6,   300] loss: 0.021
Accuracy of the network on the valid images: 73.86 %
[7,   300] loss: 0.031
Accuracy of the network on the valid images: 74.36 %
[8,   300] loss: 0.021
Accuracy of the network on the valid images: 74.46 %
[9,   300] loss: 0.019
Accuracy of the network on the valid images: 73.72 %
[10,   300] loss: 0.015
Accuracy of the network on the valid images: 74.44 %
Finished Training


In [11]:
correct = 0
total = 0
for data in testloader:
    images, labels = data
    outputs = net(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %5.2f %%' % (
    100.0 * correct / total))

Accuracy of the network on the 10000 test images: 74.15 %


## add Adam

In [None]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.001)#, weight_decay = 0.001)

## Net2

In [12]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 92, 3, padding = 1)
        self.conv1_bn = nn.BatchNorm2d(92)
        
        self.conv2 = nn.Conv2d(92, 184, 3, padding = 1)
        # an affine operation: y = Wx + b
        self.conv2_bn = nn.BatchNorm2d(184)
        
        self.conv3  = nn.Conv2d(184, 368, 3, padding = 1)
        self.conv3_bn = nn.BatchNorm2d(368)
        
        self.fc1 = nn.Linear(368 * 4 * 4, 120)
        self.fc1_bn = nn.BatchNorm1d(120)
        
        self.fc2 = nn.Linear(120, 84)
        self.fc2_bn = nn.BatchNorm1d(84)
        
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1_bn(self.conv1(x))), (2, 2))   
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2_bn(self.conv2(x))), 2)
        
        x = F.max_pool2d(F.relu(self.conv3_bn(self.conv3(x))), 2)
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1_bn(self.fc1(x)))
        x = F.relu(self.fc2_bn(self.fc2(x)))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
net = net.cuda()
print(net)

Net (
  (conv1): Conv2d(3, 92, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_bn): BatchNorm2d(92, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(92, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_bn): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True)
  (conv3): Conv2d(184, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_bn): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True)
  (fc1): Linear (5888 -> 120)
  (fc1_bn): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True)
  (fc2): Linear (120 -> 84)
  (fc2_bn): BatchNorm1d(84, eps=1e-05, momentum=0.1, affine=True)
  (fc3): Linear (84 -> 10)
)


In [21]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [16]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 300 == 299:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            running_loss = 0.0
            
    correct = 0
    total = 0
    for data in validloader:
        images, labels = data
        outputs = net(Variable(images.cuda()))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()

    print('Accuracy of the network on the valid images: %5.2f %%' % (
        100.0 * correct / total))

print('Finished Training')

[1,   300] loss: 0.002
Accuracy of the network on the valid images: 78.26 %
[2,   300] loss: 0.002
Accuracy of the network on the valid images: 78.96 %
[3,   300] loss: 0.001
Accuracy of the network on the valid images: 78.36 %
[4,   300] loss: 0.001
Accuracy of the network on the valid images: 78.56 %
[5,   300] loss: 0.001
Accuracy of the network on the valid images: 78.78 %
[6,   300] loss: 0.001
Accuracy of the network on the valid images: 78.58 %
[7,   300] loss: 0.000
Accuracy of the network on the valid images: 79.30 %
[8,   300] loss: 0.000
Accuracy of the network on the valid images: 79.18 %
[9,   300] loss: 0.000
Accuracy of the network on the valid images: 78.96 %
[10,   300] loss: 0.000
Accuracy of the network on the valid images: 78.58 %
Finished Training


In [17]:
correct = 0
total = 0
for data in testloader:
    images, labels = data
    outputs = net(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %5.2f %%' % (
    100.0 * correct / total))

Accuracy of the network on the 10000 test images: 78.86 %


## Net 3

In [35]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(3, 92, 3, padding = 1)
        self.conv1_bn = nn.BatchNorm2d(92)
        
        self.conv2 = nn.Conv2d(92, 184, 3, padding = 1)
        # an affine operation: y = Wx + b
        self.conv2_bn = nn.BatchNorm2d(184)
        
        self.conv3  = nn.Conv2d(184, 368, 3, padding = 1)
        self.conv3_bn = nn.BatchNorm2d(368)
        
        self.conv4  = nn.Conv2d(368, 736 , 3, padding = 1)
        self.conv4_bn = nn.BatchNorm2d(736)
        
        self.fc1 = nn.Linear(736 * 8 * 8, 1472)
        #self.fc1_bn = nn.BatchNorm1d(1472)
        
        self.fc2 = nn.Linear(1472, 120)
        #self.fc2_bn = nn.BatchNorm1d(120)
        
        self.fc3 = nn.Linear(120, 84)
        #self.fc3_bn = nn.BatchNorm1d(84)
        
        self.fc4 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.relu(self.conv1_bn(self.conv1(x)))   
        x = F.max_pool2d(F.relu(self.conv2_bn(self.conv2(x))), (2, 2))   
        # If the size is a square you can only specify a single number
        x = F.relu(self.conv3_bn(self.conv3(x)))
        
        x = F.max_pool2d(F.relu(self.conv4_bn(self.conv4(x))), 2)
        
        #x = F.max_pool2d(F.relu(self.conv4_bn(self.conv4(x))), 2)
        
        x = x.view(-1, self.num_flat_features(x))
        
#         x = F.relu(self.fc1_bn(self.fc1(x)))
#         x = F.relu(self.fc2_bn(self.fc2(x)))
#         x = F.relu(self.fc3_bn(self.fc3(x)))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        
        x = self.fc4(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
net = net.cuda()
print(net)

Net (
  (conv1): Conv2d(3, 92, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_bn): BatchNorm2d(92, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(92, 184, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_bn): BatchNorm2d(184, eps=1e-05, momentum=0.1, affine=True)
  (conv3): Conv2d(184, 368, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_bn): BatchNorm2d(368, eps=1e-05, momentum=0.1, affine=True)
  (conv4): Conv2d(368, 736, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4_bn): BatchNorm2d(736, eps=1e-05, momentum=0.1, affine=True)
  (fc1): Linear (47104 -> 1472)
  (fc2): Linear (1472 -> 120)
  (fc3): Linear (120 -> 84)
  (fc4): Linear (84 -> 10)
)


In [36]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [41]:
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data

        # wrap them in Variable
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.data[0]
        if i % 300 == 299:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 300))
            running_loss = 0.0
            
    correct = 0
    total = 0
    for data in validloader:
        images, labels = data
        outputs = net(Variable(images.cuda()))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.cuda()).sum()

    print('Accuracy of the network on the valid images: %5.2f %%' % (
        100.0 * correct / total))

print('Finished Training')

[1,   300] loss: 0.001
Accuracy of the network on the valid images: 80.56 %
[2,   300] loss: 0.001
Accuracy of the network on the valid images: 80.14 %
[3,   300] loss: 0.001
Accuracy of the network on the valid images: 80.16 %
[4,   300] loss: 0.001
Accuracy of the network on the valid images: 80.34 %
[5,   300] loss: 0.001
Accuracy of the network on the valid images: 80.26 %
Finished Training


In [42]:
correct = 0
total = 0
for data in testloader:
    images, labels = data
    outputs = net(Variable(images.cuda()))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %5.2f %%' % (
    100.0 * correct / total))

Accuracy of the network on the 10000 test images: 79.65 %
