# Pretrained model selection

## Data preparation and convenience functions

In [1]:
import torch
import torch.nn as nn
import numpy as np
from datetime import datetime

import matplotlib.pyplot as plt
import pandas as pd

import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

torch.manual_seed(7843)

<torch._C.Generator at 0x7fa549d3a3d0>

In [2]:
# transform for pretrained models
transform = transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = torchvision.datasets.CIFAR10(
    root='.',
    train=True,
    transform=transform,
    download=True)
test_dataset = torchvision.datasets.CIFAR10(
    root='.',
    train=False,
    transform=transform,
    download=True)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
K = len(set(train_dataset.targets))
print("Number of classes:", K)

Number of classes: 10


In [4]:
batch_size = 32
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=batch_size,
)

In [5]:
def batch_gd(model, criterion, optimizer, train_loader, test_loader, epochs):
    train_losses = np.zeros(epochs)
    test_losses = np.zeros(epochs)

    for it in range(epochs):
        t0 = datetime.now()
        train_loss = []
        for inputs, targets in train_loader:
            # move data to gpu
            inputs, targets = inputs.to(device), targets.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward pass
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # backward and optimize
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item())

        # get train loss and test loss
        train_loss = np.mean(train_loss)

        test_loss = []
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss.append(loss.item())
        test_loss = np.mean(test_loss)

        # save losses
        train_losses[it] = train_loss
        test_losses[it] = test_loss

        dt = datetime.now() - t0
        print(f'Epoch {it+1}/{epochs}, Train Loss: {train_loss:.4f}, \
          Test Loss: {test_loss:.4f}, Duration: {dt}')
  
    return train_losses, test_losses

------
## Pretrained models

In [6]:
resnet18 = models.resnet18(pretrained=True)
alexnet = models.alexnet(pretrained=True)
squeezenet = models.squeezenet1_0(pretrained=True)
vgg16 = models.vgg16(pretrained=True)
densenet = models.densenet161(pretrained=True)
inception = models.inception_v3(pretrained=True)
googlenet = models.googlenet(pretrained=True)
shufflenet = models.shufflenet_v2_x1_0(pretrained=True)
mobilenet = models.mobilenet_v2(pretrained=True)
resnext50_32x4d = models.resnext50_32x4d(pretrained=True)
wide_resnet50_2 = models.wide_resnet50_2(pretrained=True)
mnasnet = models.mnasnet1_0(pretrained=True)
vgg13_bn = models.vgg13_bn(pretrained=True)
vgg19 = models.vgg19(pretrained=True)
vgg19_bn = models.vgg19_bn(pretrained=True)

In [7]:
model_names = [
    'densenet',
    'inception',
    'googlenet',
    'resnet18',
    'shufflenet',
    'resnext50_32x4d',
    'wide_resnet50_2'
] + [
    'mobilenet',
    'mnasnet',
    'alexnet'
] + [
    'vgg13_bn',
    'vgg16',
    'vgg19',
    'vgg19_bn'
] + [
    'squeezenet'
]

model_features = [
    densenet.classifier.in_features,
    inception.fc.in_features,
    googlenet.fc.in_features,
    resnet18.fc.in_features,
    shufflenet.fc.in_features,
    resnext50_32x4d.fc.in_features,
    wide_resnet50_2.fc.in_features
] + [
    mobilenet.classifier[1].in_features,
    mnasnet.classifier[1].in_features,
    alexnet.classifier[1].in_features
] + [
    vgg13_bn.classifier[0].in_features,
    vgg16.classifier[0].in_features,
    vgg19.classifier[0].in_features,
    vgg19_bn.classifier[0].in_features
] + [
    squeezenet.classifier[1].in_channels
]

In [8]:
pd.DataFrame(model_features,index=model_names,columns=['n_features']).sort_values('n_features',ascending=False)

Unnamed: 0,n_features
vgg13_bn,25088
vgg16,25088
vgg19,25088
vgg19_bn,25088
alexnet,9216
densenet,2208
inception,2048
resnext50_32x4d,2048
wide_resnet50_2,2048
mobilenet,1280


In [9]:
# freeze parameters for

# vgg16
for param in vgg16.parameters():
    param.requires_grad = False

# vgg13 batch norm
for param in vgg13_bn.parameters():
    param.requires_grad = False

# alexnet
for param in alexnet.parameters():
    param.requires_grad = False

# densenet
for param in densenet.parameters():
    param.requires_grad = False
    
# vgg19
for param in vgg19.parameters():
    param.requires_grad = False

# vgg19 batch norm
for param in vgg19_bn.parameters():
    param.requires_grad = False

In [10]:
n_features = vgg16.classifier[0].in_features
vgg16.classifier = nn.Linear(n_features, K)

n_features = vgg13_bn.classifier[0].in_features
vgg13_bn.classifier = nn.Linear(n_features, K)

n_features = alexnet.classifier[1].in_features
alexnet.classifier = nn.Linear(n_features, K)

n_features = densenet.classifier.in_features
densenet.classifier = nn.Linear(n_features, K)

n_features = vgg19.classifier[0].in_features
vgg19.classifier = nn.Linear(n_features, K)

n_features = vgg19_bn.classifier[0].in_features
vgg19_bn.classifier = nn.Linear(n_features, K)

In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [12]:
for name, model in zip(['vgg16', 'vgg13_bn', 'alexnet', 'densenet', 'vgg19', 'vgg19_bn'],
                       [vgg16, vgg13_bn, alexnet, densenet, vgg19, vgg19_bn]):
    print(f'Model: {name}')
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())
    
    train_losses, test_losses = batch_gd(model,
                                         criterion,
                                         optimizer,
                                         train_loader,
                                         test_loader,
                                         epochs=1,
                                    )
    
    n_correct = 0.
    n_total = 0.

    for inputs, targets in train_loader:
        # Move to GPU
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward pass
        outputs = model(inputs)

        # Get prediction
        # torch.max returns both max and argmax
        _, predictions = torch.max(outputs, 1)

        # update counts
        n_correct += (predictions == targets).sum().item()
        n_total += targets.shape[0]

    train_acc = n_correct / n_total


    n_correct = 0.
    n_total = 0.
    for inputs, targets in test_loader:
        # Move to GPU
        inputs, targets = inputs.to(device), targets.to(device)

        # Forward pass
        outputs = model(inputs)

        # Get prediction
        # torch.max returns both max and argmax
        _, predictions = torch.max(outputs, 1)

        # update counts
        n_correct += (predictions == targets).sum().item()
        n_total += targets.shape[0]

    test_acc = n_correct / n_total
    
    print(f"Train acc: {train_acc:.4f}, Test acc: {test_acc:.4f}",'\n')

Model: vgg16
Epoch 1/1, Train Loss: 0.8365,           Test Loss: 0.8848, Duration: 0:03:06.289310
Train acc: 0.9255, Test acc: 0.8322 

Model: vgg13_bn
Epoch 1/1, Train Loss: 0.7551,           Test Loss: 0.7030, Duration: 0:03:05.021559
Train acc: 0.9136, Test acc: 0.8040 

Model: alexnet
Epoch 1/1, Train Loss: 0.8704,           Test Loss: 0.8481, Duration: 0:00:50.374082
Train acc: 0.8534, Test acc: 0.7856 

Model: densenet
Epoch 1/1, Train Loss: 0.6919,           Test Loss: 0.5960, Duration: 0:04:36.754148
Train acc: 0.8081, Test acc: 0.7981 

Model: vgg19
Epoch 1/1, Train Loss: 0.8486,           Test Loss: 0.8824, Duration: 0:03:31.653130
Train acc: 0.9141, Test acc: 0.8282 

Model: vgg19_bn
Epoch 1/1, Train Loss: 0.8333,           Test Loss: 0.8566, Duration: 0:03:59.312059
Train acc: 0.8767, Test acc: 0.7734 

