# Transfer Learning: ResNet18

In [None]:
%matplotlib inline
import torch
import copy
import time
import torchvision
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision import transforms,datasets, models


## Load Data:

In [None]:
apply_transform = transforms.Compose([transforms.Scale(224),transforms.ToTensor()])
BatchSize = 128

trainset = datasets.CIFAR10(root='./CIFAR10', train=True, download=True, transform=apply_transform)
trainLoader = torch.utils.data.DataLoader(trainset, batch_size=BatchSize,
                                          shuffle=True, num_workers=4) # Creating dataloader

testset = datasets.CIFAR10(root='./CIFAR10', train=False, download=True, transform=apply_transform)
testLoader = torch.utils.data.DataLoader(testset, batch_size=BatchSize,
                                         shuffle=False, num_workers=4) # Creating dataloader

In [None]:
# Size of train and test datasets
print('No. of samples in train set: '+str(len(trainLoader.dataset)))
print('No. of samples in test set: '+str(len(testLoader.dataset)))

## Define network architecture

In [None]:
net = models.resnet18(pretrained=True)
print(net)

In [None]:
# Counting number of trainable parameters
def count_parameters(model):
    # Returns only trainable params due to the last if
    # wouldnot work for shared parameters which will be counted multiple times
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print( "No. of learnable Network Parameters= "+str(count_parameters(net)))

In [None]:
# Modifying the last fully-connected layer for 10 classes
#for param in net.parameters():
#    param.requires_grad = False

net.fc = nn.Linear(512,10)

In [None]:
# Copying initial weights for visualization
init_weightConv1 = copy.deepcopy(net.conv1.weight.data) # 1st conv layer
init_weightConv2 = copy.deepcopy(net.layer1[0].conv1.weight.data) # 2nd conv layer

In [None]:
# Check availability of GPU
use_gpu = torch.cuda.is_available()
if use_gpu:
    print('GPU is available!')      
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    
net = net.to(device)

## Define loss function and optimizer

In [None]:
criterion = nn.NLLLoss() # Negative Log-likelihood
#criterion = nn.CrossEntropyLoss() # This criterion combines nn.LogSoftmax() + nn.NLLLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4) # Adam


## Train the network

In [None]:
iterations = 10
trainLoss = []
trainAuxLoss = []
trainTotalLoss = []
trainAcc = []

testLoss = []
testAcc = []

start = time.time()
for epoch in range(iterations):
    epochStart = time.time()
    runningLoss = 0.0   
    avgTotalLoss = 0.0
    running_correct = 0
    net.train(True) # For training
    for data in trainLoader:
        # Initialize gradients to zero
        optimizer.zero_grad()
        
        inputs,labels = data
        inputs,labels = inputs.to(device),labels.to(device)        
        
        outputs = net(inputs)       
        _, predicted = torch.max(outputs.data, 1)            
        running_correct += (predicted == labels).sum()              
               
        # Compute loss/error
        loss = criterion(F.log_softmax(outputs,dim=1), labels)
        # Backpropagate loss and compute gradients
        loss.backward()
        # Update the network parameters
        optimizer.step()
        # Accumulate loss per batch
        runningLoss += loss.item() 
        
    avgTrainAcc = running_correct.item()/50000.0
    avgTrainLoss = runningLoss/50000.0    
    trainAcc.append(avgTrainAcc)
    trainLoss.append(avgTrainLoss)    
    
    # Evaluating performance on test set for each epoch
    net.train(False) # For testing [Affects batch-norm and dropout layers (if any)]
    running_correct = 0
    for data in testLoader:
        inputs,labels = data
        inputs,labels = inputs.to(device),labels.to(device)
        
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)            
        running_correct += (predicted == labels).sum()       
        
        loss = criterion(F.log_softmax(outputs,dim=1), labels)
        runningLoss += loss.item() 
        
    avgTestLoss = runningLoss/10000.0
    avgTestAcc = running_correct.item()/10000.0
    testLoss.append(avgTestLoss)
    testAcc.append(avgTestAcc)
        
    
    # Plotting training loss vs Epochs
    fig1 = plt.figure(1)        
    plt.plot(range(epoch+1),trainLoss,'r-',label='train')  
    plt.plot(range(epoch+1),testLoss,'g-',label='test') 
    if epoch==0:
        plt.legend(loc='upper left')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')   
    # Plotting testing accuracy vs Epochs
    fig2 = plt.figure(2)        
    plt.plot(range(epoch+1),trainAcc,'r-',label='train')    
    plt.plot(range(epoch+1),testAcc,'g-',label='test')        
    if epoch==0:
        plt.legend(loc='upper left')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')    
    epochEnd = time.time()-epochStart
    print('Iteration: {:.0f} /{:.0f}  ;  Training Loss: {:.6f} ; Testing Acc: {:.3f} ; Time consumed: {:.0f}m {:.0f}s '\
          .format(epoch + 1,iterations,avgTrainLoss,avgTestAcc*100,epochEnd//60,epochEnd%60))

end = time.time()-start
print('Training completed in {:.0f}m {:.0f}s'.format(end//60,end%60))


In [None]:
# Copying trained weights for visualization
if use_gpu:
    trained_weightConv1 = copy.deepcopy(net.conv1.weight.data.cpu())
    trained_weightConv2 = copy.deepcopy(net.layer1[0].conv1.weight.data.cpu())
else:
    trained_weightConv1 = copy.deepcopy(net.conv1.weight.data)
    trained_weightConv2 = copy.deepcopy(net.layer1[0].conv1.weight.data)

## Visualization of weights

In [None]:
# functions to show an image
def imshow(img, strlabel):
    npimg = img.numpy()
    npimg = np.abs(npimg)
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 10
    fig_size[1] = 10
    plt.rcParams["figure.figsize"] = fig_size
    plt.figure()
    plt.title(strlabel)
    plt.imshow(np.transpose(npimg, (1, 2, 0)))

In [None]:
# Visualizing weights of 1st convolutional layer
imshow(torchvision.utils.make_grid(init_weightConv1,nrow=8,normalize=True),'Initial weights: conv1')
imshow(torchvision.utils.make_grid(trained_weightConv1,nrow=8,normalize=True),'Trained weights: conv1')
imshow(torchvision.utils.make_grid(init_weightConv1-trained_weightConv1,nrow=8,normalize=True),'Difference of weights: conv1')

In [None]:
# Visualizing weights of 2nd convolutional layer
imshow(torchvision.utils.make_grid(init_weightConv2[0].unsqueeze(1),nrow=8,normalize=True),'Initial weights: conv2')
imshow(torchvision.utils.make_grid(trained_weightConv2[0].unsqueeze(1),nrow=8,normalize=True),'Trained weights: conv2')
imshow(torchvision.utils.make_grid(init_weightConv2[0].unsqueeze(1)-trained_weightConv2[0].unsqueeze(1),nrow=8,normalize=True),'Difference of weights: conv2')