# normalization
Let's see what normalization does to the image.
we set shuffle to False so the picture will be the same and we can see the difference

In [2]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch
import torchvision
import torchvision.transforms as transforms

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transforms.ToTensor())
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=False)

it = iter(trainloader)
images, _ = it.next()

torchvision.utils.save_image(images,'grid.png',  nrow=4)

Files already downloaded and verified


In [3]:
trainset.data[0].shape

(32, 32, 3)

 compute mean and std:

In [4]:
print(trainset.data.mean(axis=(0,1,2)) / 255.0)
print(trainset.data.std(axis=(0,1,2)) / 255.0)
# division by 255.0 because the data is given in bytes.

[0.49139968 0.48215841 0.44653091]
[0.24703223 0.24348513 0.26158784]


In [5]:
transform = transforms.Compose([
    transforms.RandomCrop(16),
    transforms.Scale(32),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.2434, 0.2615)),
])

trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=False,
                                        transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=False)

it = iter(trainloader)
images,_ = it.next()
images.size()

torchvision.utils.save_image(images,'grid_norm.png',  nrow=4)



In [6]:
len(trainloader)

3125

# convolution

In [7]:
import torch.nn as nn

im = images
conv = nn.Conv2d(3, 16, 5, 2)
# 3 input maps, 16 output maps
# 5x5 kernels, 2x2 strides, without padding
output = conv(im)
print(conv.weight.size())
torchvision.utils.save_image(im[0].data,'before_conv.png')
torchvision.utils.save_image(output[0].unsqueeze(1).data,'conv_output.png',nrow=4)

torch.Size([16, 3, 5, 5])


In [8]:
output.shape

torch.Size([16, 16, 14, 14])

In [None]:
import matplotlib.pyplot as plt

plt.imshow(output[0, 7].detach().cpu(), cmap='gray')

# Full example - cifar10

load and normalize data, define hyper parameters:

In [8]:
import torch 
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms

# Hyper Parameters
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# Image Preprocessing 
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.247, 0.2434, 0.2615)),
])

# CIFAR-10 Dataset
train_dataset = dsets.CIFAR10(root='./data/',
                               train=True, 
                               transform=transform,
                               download=True)

test_dataset = dsets.CIFAR10(root='./data/',
                              train=False, 
                              transform=transform,
                              download=True)

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


define a model:

In [9]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(8*8*32, 10)
        self.dropout = nn.Dropout(p=0.5)
        self.logsoftmax = nn.LogSoftmax(dim=1)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)
        out = self.fc(out)

        return self.logsoftmax(out)

In [10]:
cnn = CNN()

In [11]:
print(cnn)

CNN(
  (layer1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=2048, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (logsoftmax): LogSoftmax(dim=1)
)


In [None]:
example_cnn = CNN().cuda()

 build model, define loss and optimizer

In [None]:
cnn = CNN()

if torch.cuda.is_available():
    cnn = cnn.cuda()
    
# convert all the weights tensors to cuda()
# Loss and Optimizer

criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

print('number of parameters: ', sum(param.numel() for param in cnn.parameters()))

number of parameters:  34634


In [None]:
def evaluate_model(_model, data_loader):
  correct = 0
  total = 0

  for images, labels in data_loader:
      if torch.cuda.is_available():
          images = images.cuda()
          labels = labels.cuda()
      
      outputs = _model(images)  # Forward
      
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum()

  print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

training the model

In [None]:
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
      if torch.cuda.is_available():
          images = images.cuda()
          labels = labels.cuda()
      
      # Forward + Backward + Optimize
      outputs = cnn(images) # Forward
      loss = criterion(outputs, labels) # Compute loss
      optimizer.zero_grad() # Zero the gradients
      loss.backward() # backprop 
      optimizer.step() # Update weights
      
      # if (i+1) % 100 == 0:
      #     print('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
      #            %(epoch+1, num_epochs, i+1,
      #              len(train_dataset)//batch_size, loss.data))
    
  cnn.train(False)
  evaluate_model(cnn, test_loader)
  cnn.train(True)

Test Accuracy of the model on the 10000 test images: 57 %
Test Accuracy of the model on the 10000 test images: 61 %
Test Accuracy of the model on the 10000 test images: 64 %
Test Accuracy of the model on the 10000 test images: 66 %
Test Accuracy of the model on the 10000 test images: 68 %


In [None]:
  cnn.train(False)
  evaluate_model(cnn, train_loader)
  cnn.train(True)

In [None]:
print(cnn)

evaluating the model

In [None]:
cnn.eval() # cnn.train(False)
correct = 0
total = 0

for images, labels in test_loader:
    if torch.cuda.is_available():
        images = images.cuda()
        labels = labels.cuda()
    
    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()

print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

In [None]:
# Save the Trained Model
torch.save(cnn.state_dict(), 'cnn.pkl')