**Example of Training CNN with cifar10 Dataset**


---
The data and results will be stored in the following directories

  1. drive/My Drive/public/data/ has data
  2. drive/My Drive/public/results/ will have results



Mount your good drive. Check by '! ls' command

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
!ls

drive  sample_data


Import PyTorch library and check by printing the version information

In [11]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F 

import torchvision.datasets as dset
import torchvision.transforms as transforms
import numpy as np

from torch.utils.data import DataLoader

import warnings
warnings.filterwarnings('ignore')
import os

In [12]:
print(torch.__version__)

1.5.1+cu101


Define your network model. We have defined a CNN model in advance. 

In [13]:
class CIFAR10_CNN_model(nn.Module):
    def __init__(self):
        super(CIFAR10_CNN_model,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(3,16,3,padding=1),
            nn.ReLU(),
            nn.Conv2d(16,32,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2), # 32 x 16 x 16 (batch_size width height)
            
            # define 2 additional convolution layers and maxpool layer
            # add one conv layer
            nn.Conv2d(32,64,3,padding=1),
            # add activation function
            nn.ReLU(),
            # add another conv layer 
            nn.Conv2d(64,128,3,padding=1),
            # add activation function
            nn.ReLU(),
            # add max pooling layer
            nn.MaxPool2d(2,2), # 128 x 8 x 8
            
            # another two additional layers
            nn.Conv2d(128,256,3,padding=1),
            nn.ReLU(),
            nn.Conv2d(256,256,3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )

        conv_size = self.get_conv_size((3,32,32))

        self.fc_layer = nn.Sequential(
            nn.Linear(conv_size,200),
            nn.ReLU(),
            nn.Linear(200,10)
        )       

    def get_conv_size(self, shape):
        o = self.layer(torch.zeros(1, *shape))
        return int(np.prod(o.size()))
        
    def forward(self,x):
        # Define forward function of the model

        # obtain batch size
        batch_size, c, h, w = x.data.size()

        # feed data through conv layers
        out = self.layer(x)

        # reshape the output of convolution layer for fully-connected layer
        out = out.view(batch_size, -1)

        # feed data through feed-forward layer
        out = self.fc_layer(out)
        return F.log_softmax(out, dim=1)


In [24]:
# check model
model = CIFAR10_CNN_model()
print(model)

print('=' * 90)

# check forward()
mytensor = torch.zeros((1,3,32,32))
out = model(mytensor)
print('output shape: ', out.shape)

CIFAR10_CNN_model(
  (layer): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU()
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU()
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=4096, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_featu

Load MNIST datset 

In [28]:
batch_size = 32

cifar_train = dset.CIFAR10("drive/My Drive/public/data/", train=True, 
                           transform=transforms.ToTensor(), 
                           target_transform=None, download=True)
cifar_test = dset.CIFAR10("drive/My Drive/public/data/", train=False, 
                          transform=transforms.ToTensor(), 
                          target_transform=None, download=True)



train_loader = torch.utils.data.DataLoader(cifar_train,batch_size=batch_size, 
                                  shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(cifar_test,batch_size=batch_size, 
                                  shuffle=False,num_workers=2,drop_last=True)

Files already downloaded and verified
Files already downloaded and verified


In [31]:
print('train dataset: ', cifar_train.__getitem__(0)[0].size(), cifar_train.__len__())
print('test dataset: ', cifar_test.__getitem__(0)[0].size(), cifar_test.__len__())

print('=' * 90)

for batch, (data, target) in enumerate(train_loader):
  print('data shape: ', data.shape)
  print('target shape: ', target.shape)
  break

train dataset:  torch.Size([3, 32, 32]) 50000
test dataset:  torch.Size([3, 32, 32]) 10000
data shape:  torch.Size([32, 3, 32, 32])
target shape:  torch.Size([32])


In [32]:
def train(model, device, train_loader, optimizer, epoch, log_interval):
  model.train()
  for batch_idx,(data,target) in enumerate(train_loader):
    # implement training loop
    # send tensors to GPU
    data, target = data.to(device), target.to(device)
    
    # initialize optimizer
    optimizer.zero_grad()

    # put data into model
    output = model(data)

    # compute loss
    loss = F.nll_loss(output, target)
    
    # backpropagate error using loss tensor
    loss.backward()

    # update model parameter using optimizer
    optimizer.step()
    
    if batch_idx % log_interval == 0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
          epoch, batch_idx * len(data), len(train_loader.dataset),
          100. * batch_idx / len(train_loader), loss.item()))


def test(model, device, test_loader):
  correct = 0
  total = 0
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)

      _,output_index = torch.max(output,1)  
      total += target.size(0)
      correct += (output_index == target).sum().float()
    
    print('\nTest: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    #print("Accuracy of Test Data: {}".format(100*correct/total))

Define hyper-parameters for training. Then define model and optimizer


In [35]:
seed = 1
learning_rate = 0.001
num_epoch = 5
log_interval=100

torch.manual_seed(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CIFAR10_CNN_model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

Train the model by running training loop

In [None]:
for epoch in range(1, num_epoch + 1):
    train(model, device, train_loader, optimizer, epoch, log_interval)
    test(model, device, test_loader)

if not os.path.exists('drive/My Drive/public/results'):
    os.mkdir('drive/My Drive/public/results') 
torch.save(model, 'drive/My Drive/public/results/cifar10_pretrained.pth')

the end!
----