<a href="https://colab.research.google.com/github/anbansal/PyTorch-Linkedin-Training/blob/master/PyTorch_Transfer_Learning_CIFAR10_LinkedIn_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark=False
torch.manual_seed(0)
import numpy as np
np.random.seed(0)
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader
import torch.optim as optim

In [0]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [0]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
transforms = transforms.Compose([
                                 transforms.Resize((224,224)),
                                 transforms.ToTensor(),
                                 transforms.Normalize(mean=mean,std=std)
                                 ])
trainset=datasets.CIFAR10('~/.pytorch/CIFAR/',train=True,transform=transforms,download=True)
testset=datasets.CIFAR10('~/.pytorch/CIFAR/',train=False,transform=transforms,download=True)

trainloader=DataLoader(trainset,batch_size=64,shuffle=True)
testloader=DataLoader(testset,batch_size=64,shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [0]:
def denormalize(tensor):
  tensor = tensor*std+ mean
  return tensor

def show_img(img):
  img = img.numpy().transpose((1,2,0))
  img = denormalize(img)
  img = np.clip(img,0,1)
  plt.imshow(img)
  
def get_CIFAR10_class(id):
  CIFAR10_classes = ['plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
  return CIFAR10_classes[id]

In [0]:
model = models.vgg16(pretrained=True)

In [0]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [0]:
for params in model.parameters():
  params.requires_grad = False

In [0]:
model.classifier[-1] = nn.Sequential(
                                      nn.Linear(in_features=4096, out_features=10),
                                      nn.LogSoftmax(dim=1)
                                    )

In [0]:
criterion = nn.NLLLoss()

In [0]:
model.to(device)
optimizer = optim.Adam(model.parameters())
num_epochs = 1
batch_loss = 0
cum_loss = 0
for e in range(num_epochs):
  for batch, (images,labels) in enumerate(trainloader,1):
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    logps=model(images)
    loss=criterion(logps,labels)
    loss.backward()
    optimizer.step()
    batch_loss += loss.item()
    print(f'Epoch {e}/{num_epochs}: Batch {batch}/{len(trainloader)}: Batch Loss: {loss.item()}')
print(f'Training Loss: {batch_loss/len(trainloader)}')

Epoch 0/1: Batch 1/782: Batch Loss: 2.330793857574463
Epoch 0/1: Batch 2/782: Batch Loss: 2.1873044967651367
Epoch 0/1: Batch 3/782: Batch Loss: 2.0241212844848633
Epoch 0/1: Batch 4/782: Batch Loss: 2.1179025173187256
Epoch 0/1: Batch 5/782: Batch Loss: 1.783939003944397
Epoch 0/1: Batch 6/782: Batch Loss: 1.7197850942611694
Epoch 0/1: Batch 7/782: Batch Loss: 1.4781049489974976
Epoch 0/1: Batch 8/782: Batch Loss: 1.509700894355774
Epoch 0/1: Batch 9/782: Batch Loss: 1.373780369758606
Epoch 0/1: Batch 10/782: Batch Loss: 1.2858160734176636
Epoch 0/1: Batch 11/782: Batch Loss: 1.196153163909912
Epoch 0/1: Batch 12/782: Batch Loss: 1.2627209424972534
Epoch 0/1: Batch 13/782: Batch Loss: 1.0433933734893799
Epoch 0/1: Batch 14/782: Batch Loss: 1.1577554941177368
Epoch 0/1: Batch 15/782: Batch Loss: 1.1435178518295288
Epoch 0/1: Batch 16/782: Batch Loss: 1.1732757091522217
Epoch 0/1: Batch 17/782: Batch Loss: 0.9265352487564087
Epoch 0/1: Batch 18/782: Batch Loss: 1.016086220741272
Epoch 0

In [0]:
model.eval()
num_correct = 0
total = 0
with torch.no_grad():
  for batch, (images, labels) in enumerate(testloader,1):
    images = images.to(device)
    labels = labels.to(device)
    logps=model(images)
    output = torch.exp(logps)
    pred = torch.argmax(output,1)
    total += labels.size(0)
    num_correct += (pred == labels).sum().item()
    print(f'Batch {batch}/{len(testloader)}: Accuracy: {(pred == labels).sum().item()*100/labels.size(0)} %') 
  print(f'Total Accuracy after {total} images: {num_correct*100/total} %') 

Batch 1/157: Accuracy: 89.0625 %
Batch 2/157: Accuracy: 84.375 %
Batch 3/157: Accuracy: 81.25 %
Batch 4/157: Accuracy: 76.5625 %
Batch 5/157: Accuracy: 78.125 %
Batch 6/157: Accuracy: 87.5 %
Batch 7/157: Accuracy: 79.6875 %
Batch 8/157: Accuracy: 87.5 %
Batch 9/157: Accuracy: 84.375 %
Batch 10/157: Accuracy: 81.25 %
Batch 11/157: Accuracy: 81.25 %
Batch 12/157: Accuracy: 84.375 %
Batch 13/157: Accuracy: 89.0625 %
Batch 14/157: Accuracy: 79.6875 %
Batch 15/157: Accuracy: 81.25 %
Batch 16/157: Accuracy: 85.9375 %
Batch 17/157: Accuracy: 85.9375 %
Batch 18/157: Accuracy: 79.6875 %
Batch 19/157: Accuracy: 85.9375 %
Batch 20/157: Accuracy: 71.875 %
Batch 21/157: Accuracy: 85.9375 %
Batch 22/157: Accuracy: 79.6875 %
Batch 23/157: Accuracy: 79.6875 %
Batch 24/157: Accuracy: 81.25 %
Batch 25/157: Accuracy: 82.8125 %
Batch 26/157: Accuracy: 84.375 %
Batch 27/157: Accuracy: 84.375 %
Batch 28/157: Accuracy: 85.9375 %
Batch 29/157: Accuracy: 87.5 %
Batch 30/157: Accuracy: 85.9375 %
Batch 31/157: A

In [0]:
model.train()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [0]:
for params in model.classifier.parameters():
  print(params.requires_grad)

False
False
False
False
True
True


In [0]:
print(model.classifier[6].parameters())

<generator object Module.parameters at 0x7fc13286dca8>


In [0]:
model.train()
for i in range(17,31):
  model.features[i].requires_grad = True
for i in range(0,7):
  model.classifier[i].requires_grad = True
model.classifier[6] = nn.Sequential(
                                    nn.Linear(in_features=4096, out_features=512),
                                    nn.ReLU(),
                                    nn.Dropout(p=0.5),
                                    nn.Linear(in_features=512, out_features=10),
                                    nn.LogSoftmax(dim=1))
lr = 3e-4
optimizer = optim.Adam([
                        {'params': model.features[17].parameters(),'lr':lr/9},
                        {'params': model.features[19].parameters(),'lr':lr/9},
                        {'params': model.features[21].parameters(),'lr':lr/9},
                        {'params': model.features[24].parameters(),'lr':lr/3},
                        {'params': model.features[26].parameters(),'lr':lr/3},
                        {'params': model.features[28].parameters(),'lr':lr/3},
                        {'params': model.classifier[0].parameters(),'lr':lr},
                        {'params': model.classifier[3].parameters(),'lr':lr},
                        {'params': model.classifier[6].parameters(),'lr':lr}
                        ],lr=lr)
model.to(device)
num_epochs = 1
batch_loss = 0
cum_loss = 0
for e in range(num_epochs):
  for batch, (images,labels) in enumerate(trainloader,1):
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    logps=model(images)
    loss=criterion(logps,labels)
    loss.backward()
    optimizer.step()
    batch_loss += loss.item()
    print(f'Epoch {e}/{num_epochs}: Batch {batch}/{len(trainloader)}: Batch Loss: {loss.item()}')
print(f'Training Loss: {batch_loss/len(trainloader)}')


model.eval()
num_correct = 0
total = 0
with torch.no_grad():
  for batch, (images, labels) in enumerate(testloader,1):
    images = images.to(device)
    labels = labels.to(device)
    logps=model(images)
    output = torch.exp(logps)
    pred = torch.argmax(output,1)
    total += labels.size(0)
    num_correct += (pred == labels).sum().item()
    print(f'Batch {batch}/{len(testloader)}: Accuracy: {(pred == labels).sum().item()*100/labels.size(0)} %') 
  print(f'Total Accuracy after {total} images: {num_correct*100/total} %') 

Epoch 0/1: Batch 1/782: Batch Loss: 2.311833143234253
Epoch 0/1: Batch 2/782: Batch Loss: 2.301222562789917
Epoch 0/1: Batch 3/782: Batch Loss: 2.2607133388519287
Epoch 0/1: Batch 4/782: Batch Loss: 2.1801724433898926
Epoch 0/1: Batch 5/782: Batch Loss: 2.0809450149536133
Epoch 0/1: Batch 6/782: Batch Loss: 1.9618258476257324
Epoch 0/1: Batch 7/782: Batch Loss: 1.9490665197372437
Epoch 0/1: Batch 8/782: Batch Loss: 2.0000247955322266
Epoch 0/1: Batch 9/782: Batch Loss: 1.7094556093215942
Epoch 0/1: Batch 10/782: Batch Loss: 1.7017565965652466
Epoch 0/1: Batch 11/782: Batch Loss: 1.6280021667480469
Epoch 0/1: Batch 12/782: Batch Loss: 1.5916240215301514
Epoch 0/1: Batch 13/782: Batch Loss: 1.605338454246521
Epoch 0/1: Batch 14/782: Batch Loss: 1.5252914428710938
Epoch 0/1: Batch 15/782: Batch Loss: 1.4200986623764038
Epoch 0/1: Batch 16/782: Batch Loss: 1.4461557865142822
Epoch 0/1: Batch 17/782: Batch Loss: 1.2669155597686768
Epoch 0/1: Batch 18/782: Batch Loss: 1.1598155498504639
Epoc

In [0]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [0]:
model.train()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [0]:
model.to('cpu')
for i in range(0,6):
  model.classifier[i].requires_grad = True

In [0]:
for params in model.features.parameters():
  params.requires_grad = False

In [0]:
for i in range(0,31):
  model.features[i].requires_grad = True
for params in model.features.parameters():
  print(params.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [0]:
for name,param in model.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

	 classifier.6.0.weight
	 classifier.6.0.bias
	 classifier.6.3.weight
	 classifier.6.3.bias


In [0]:
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters.')
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} training parameters.')

136,363,338 total parameters.
2,102,794 training parameters.


In [0]:
res50_model = models.resnet50(pretrained=True)
res50_conv = nn.Sequential(*list(res50_model.children())[:-2])

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /root/.cache/torch/checkpoints/resnet50-19c8e357.pth


HBox(children=(IntProgress(value=0, max=102502400), HTML(value='')))




In [0]:
nn.Sequential(*list(res50_model.children())[:-2])

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [0]:
for name,param in res50_conv.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

	 0.weight
	 1.weight
	 1.bias
	 4.0.conv1.weight
	 4.0.bn1.weight
	 4.0.bn1.bias
	 4.0.conv2.weight
	 4.0.bn2.weight
	 4.0.bn2.bias
	 4.0.conv3.weight
	 4.0.bn3.weight
	 4.0.bn3.bias
	 4.0.downsample.0.weight
	 4.0.downsample.1.weight
	 4.0.downsample.1.bias
	 4.1.conv1.weight
	 4.1.bn1.weight
	 4.1.bn1.bias
	 4.1.conv2.weight
	 4.1.bn2.weight
	 4.1.bn2.bias
	 4.1.conv3.weight
	 4.1.bn3.weight
	 4.1.bn3.bias
	 4.2.conv1.weight
	 4.2.bn1.weight
	 4.2.bn1.bias
	 4.2.conv2.weight
	 4.2.bn2.weight
	 4.2.bn2.bias
	 4.2.conv3.weight
	 4.2.bn3.weight
	 4.2.bn3.bias
	 5.0.conv1.weight
	 5.0.bn1.weight
	 5.0.bn1.bias
	 5.0.conv2.weight
	 5.0.bn2.weight
	 5.0.bn2.bias
	 5.0.conv3.weight
	 5.0.bn3.weight
	 5.0.bn3.bias
	 5.0.downsample.0.weight
	 5.0.downsample.1.weight
	 5.0.downsample.1.bias
	 5.1.conv1.weight
	 5.1.bn1.weight
	 5.1.bn1.bias
	 5.1.conv2.weight
	 5.1.bn2.weight
	 5.1.bn2.bias
	 5.1.conv3.weight
	 5.1.bn3.weight
	 5.1.bn3.bias
	 5.2.conv1.weight
	 5.2.bn1.weight
	 5.2.bn1.bias
	

In [0]:
for param in res50_conv.parameters():
    param.requires_grad = False

In [0]:
model.train()
model.to(device)
num_epochs = 1
batch_loss = 0
cum_loss = 0
for e in range(num_epochs):
  for batch, (images,labels) in enumerate(trainloader,1):
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    logps=model(images)
    loss=criterion(logps,labels)
    loss.backward()
    optimizer.step()
    batch_loss += loss.item()
    print(f'Epoch {e}/{num_epochs}: Batch {batch}/{len(trainloader)}: Batch Loss: {loss.item()}')
print(f'Training Loss: {batch_loss/len(trainloader)}')


model.eval()
num_correct = 0
total = 0
with torch.no_grad():
  for batch, (images, labels) in enumerate(testloader,1):
    images = images.to(device)
    labels = labels.to(device)
    logps=model(images)
    output = torch.exp(logps)
    pred = torch.argmax(output,1)
    total += labels.size(0)
    num_correct += (pred == labels).sum().item()
    print(f'Batch {batch}/{len(testloader)}: Accuracy: {(pred == labels).sum().item()*100/labels.size(0)} %') 
  print(f'Total Accuracy after {total} images: {num_correct*100/total} %')

Epoch 0/1: Batch 1/782: Batch Loss: 0.7414603233337402
Epoch 0/1: Batch 2/782: Batch Loss: 0.3884875178337097
Epoch 0/1: Batch 3/782: Batch Loss: 0.8433461785316467
Epoch 0/1: Batch 4/782: Batch Loss: 0.5378896594047546
Epoch 0/1: Batch 5/782: Batch Loss: 0.42450791597366333
Epoch 0/1: Batch 6/782: Batch Loss: 0.6997003555297852
Epoch 0/1: Batch 7/782: Batch Loss: 0.5680793523788452
Epoch 0/1: Batch 8/782: Batch Loss: 0.4067124128341675
Epoch 0/1: Batch 9/782: Batch Loss: 0.4366336762905121
Epoch 0/1: Batch 10/782: Batch Loss: 0.5363320112228394
Epoch 0/1: Batch 11/782: Batch Loss: 0.4877561330795288
Epoch 0/1: Batch 12/782: Batch Loss: 0.7921039462089539
Epoch 0/1: Batch 13/782: Batch Loss: 0.31972748041152954
Epoch 0/1: Batch 14/782: Batch Loss: 0.5147030353546143
Epoch 0/1: Batch 15/782: Batch Loss: 0.7436081171035767
Epoch 0/1: Batch 16/782: Batch Loss: 0.6659941673278809
Epoch 0/1: Batch 17/782: Batch Loss: 0.6033780574798584
Epoch 0/1: Batch 18/782: Batch Loss: 0.725317656993866


KeyboardInterrupt: ignored