# 7. Transfer Learning in PyTorch

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

import matplotlib.pyplot as plt

import torch
import numpy as np
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models

In [2]:
data_dir = './Cat_Dog_data'

train_transforms = transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

test_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

train_data = datasets.ImageFolder(f'{data_dir}/train', transform=train_transforms)
test_data = datasets.ImageFolder(f'{data_dir}/test', transform=test_transforms)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32)

In [3]:
model = models.resnet50(pretrained=True)
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [4]:
for parameter in model.parameters():
    parameter.requires_grad = False  # We won't back propagate through them.
    
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([
            ('fully_connected_1', nn.Linear(2048, 1024)),
            ('relu_1', nn.ReLU()),
            ('fully_connected_2', nn.Linear(1024, 500)),
            ('relu_2', nn.ReLU()),
            ('fully_connected_3', nn.Linear(500, 2)),
            ('output', nn.LogSoftmax(dim=1))
        ]))

model.fc = classifier

In [5]:
import time

for device in ['cpu', 'cuda']:
    criterion = nn.NLLLoss()
    
    optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
    
    model.to(device)
    
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        start = time.time()
        
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if i == 3:
            break
    print(f'{device}; Time per batch: {(time.time() - start) / 3} seconds.')

cpu; Time per batch: 6.911980946858724 seconds.
cuda; Time per batch: 0.008353869120279947 seconds.


In [6]:
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [7]:
epochs = 20
print_every = 40
steps = 0

model.to('cuda')

for epoch in range(1, epochs + 1):
    running_loss = 0
    
    for index, (inputs, labels) in enumerate(train_loader):
        steps += 1
        
        inputs = inputs.to('cuda')
        labels = labels.to('cuda')
        
        optimizer.zero_grad()
        
        # Forward and back
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            print(f'Epoch: {epoch}/{epochs}... Loss: {running_loss / print_every}')
            running_loss = 0

Epoch: 1/20... Loss: 0.2475408310070634
Epoch: 1/20... Loss: 0.16302221771329642
Epoch: 1/20... Loss: 0.17957930359989405
Epoch: 1/20... Loss: 0.16213754210621117
Epoch: 1/20... Loss: 0.15875743925571442
Epoch: 1/20... Loss: 0.1318453012034297
Epoch: 1/20... Loss: 0.14570468608289958
Epoch: 1/20... Loss: 0.17813535761088134
Epoch: 2/20... Loss: 0.028837460093200207
Epoch: 2/20... Loss: 0.1539519908837974
Epoch: 2/20... Loss: 0.15600432977080345
Epoch: 2/20... Loss: 0.16420881655067204
Epoch: 2/20... Loss: 0.13101341156288981
Epoch: 2/20... Loss: 0.136171667650342
Epoch: 2/20... Loss: 0.12392423488199711
Epoch: 2/20... Loss: 0.1248711010441184
Epoch: 2/20... Loss: 0.1497477786615491
Epoch: 3/20... Loss: 0.06365002915263177
Epoch: 3/20... Loss: 0.13448715871199965
Epoch: 3/20... Loss: 0.11339101386256516
Epoch: 3/20... Loss: 0.1452797709964216
Epoch: 3/20... Loss: 0.15900230463594198
Epoch: 3/20... Loss: 0.1599993571639061
Epoch: 3/20... Loss: 0.12858758862130343
Epoch: 3/20... Loss: 0.1

KeyboardInterrupt: 

In [8]:
correct = 0
total = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images = images.to('cuda')
        labels = labels.to('cuda')
        
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
print(f'Accuracy of the network on 10000 test images: {100 * correct / total}')

Accuracy of the network on 10000 test images: 55.8


In [None]:
def do_deep_learning(model, train_loader, epochs, print_every, criterion, optimizer, device='cpu'):
    epochs = epochs
    print_every = print_every
    steps = 0

    # change to cuda
    model.to('cuda')

    for epoch in range(1, epochs + 1):
        running_loss = 0
        for index, (inputs, labels) in enumerate(train_loader):
            steps += 1

            inputs, labels = inputs.to('cuda'), labels.to('cuda')

            optimizer.zero_grad()

            # Forward and backward passes
            outputs = model.forward(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if steps % print_every == 0:
                print(f'Epoch: {epoch}/{epochs}')
                print(f'Loss: {running_loss/print_every}')

                running_loss = 0
                
def check_accuracy_on_test(test_loader):
    correct = 0
    total = 0

    with torch.no_grad():
        for data in test_loader:
            images, labels = data

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on 10000 test images: {100 * correct / total}')