# Tutoriel 4 - Transfert d'apprentissage

In [1]:
import math
import torch
import numpy as np
from torch import optim, nn
from torch.autograd import Variable
from torchvision import transforms
import torchvision.models as models
from torchvision.datasets.cifar import CIFAR100
from torch.utils.data import DataLoader, random_split
from torch.nn.init import kaiming_normal_, constant_

# New imports!
from pytoune.framework import Model, ModelCheckpoint, Callback, CSVLogger, EarlyStopping, ReduceLROnPlateau
from pytoune import torch_to_numpy
from pytoune.layers import Flatten
from tensorboardX import SummaryWriter
from torchvision.utils import make_grid

torch.manual_seed(42)
np.random.seed(42)

In [2]:
# Training hyperparameters
cuda_device = 0
device = torch.device("cuda:%d" % cuda_device if torch.cuda.is_available() else "cpu")
batch_size = 32
learning_rate = 0.01
n_epoch = 5
num_classes = 100

In [3]:
def load_cifar100(download=False, path='./data', transform=None):
    """Loads the cifar10 dataset.

    :param download: Download the dataset
    :param path: Folder to put the dataset
    :return: The train and test dataset
    """
    train_dataset = CIFAR100(path, train=True, download=download, transform=transform)
    test_dataset = CIFAR100(path, train=False, download=download, transform=transform)
    return train_dataset, test_dataset


def load_cifar100_with_validation_set(download=False, path='./data', train_split=0.8):
    """Loads the CIFAR10 dataset.

    :param download: Download the dataset
    :param path: Folder to put the dataset
    :return: The train, valid and test dataset ready to be ingest in a neural network
    """
    train, test = load_cifar100(download, path)
    lengths = [round(train_split*len(train)), round((1.0-train_split)*len(train))]
    train, valid = random_split(train, lengths)
    return train, valid, test


In [20]:
norm_coefs = {}
norm_coefs['imagenet'] = [(0.485, 0.456, 0.406), (0.229, 0.224, 0.225)]

test_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(*norm_coefs['imagenet'])
])

train_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ColorJitter(hue=.05, saturation=.05),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(*norm_coefs['imagenet'])
])

train, valid, test = load_cifar100_with_validation_set(download=True)

train.dataset.transform = train_transforms
valid.dataset.transform = test_transforms
test.transform = test_transforms

Files already downloaded and verified
Files already downloaded and verified


In [21]:
len(train), len(valid), len(test)

(40000, 10000, 10000)

In [22]:
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid, batch_size=batch_size)
test_loader = DataLoader(test, batch_size=batch_size)

In [23]:
def train(name, pytorch_module, params=None):
    if not params:
      params = pytorch_module.parameters()
    
    optimizer = optim.SGD(params, lr=learning_rate, momentum=0.9, nesterov=True)
    loss_function = nn.CrossEntropyLoss()
    
    early_stopping = EarlyStopping(patience=10)
    lr_scheduler = ReduceLROnPlateau(patience=5)
    callbacks = [early_stopping, lr_scheduler]

    # Pytoune Model
    model = Model(pytorch_module, optimizer, loss_function, metrics=['accuracy'])

    # Send model on GPU
    model.to(device)

    # Train
    model.fit_generator(train_loader, valid_loader, epochs=n_epoch)
    return model

## Nous allons entraîner seulement la dernière couche pour classifier 100 types d'oiseaux.

Nous devons donc redéfinir la dernière couche du réseau resnet34 pour le bon nombre de classes dans notre jeu de données (100 au lieu de 1000)

In [24]:
net = models.resnet34(pretrained=True)
net.fc = nn.Linear(net.fc.in_features, num_classes)
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [25]:
list(net.named_parameters())

[('conv1.weight', Parameter containing:
  tensor([[[[ 5.4109e-03, -6.9092e-03,  7.8839e-03,  ...,  4.9072e-02,
              3.0660e-02,  2.5398e-02],
            [ 4.1081e-02,  3.1296e-02,  3.2265e-02,  ...,  3.3145e-02,
              2.9754e-02,  4.1735e-02],
            [ 4.9519e-03, -3.1705e-02, -6.1310e-02,  ..., -9.7493e-02,
             -1.1601e-01, -1.2191e-01],
            ...,
            [-1.2287e-02, -2.4841e-02, -9.3052e-03,  ...,  1.7113e-02,
              2.4631e-03,  1.6726e-02],
            [ 3.9117e-03,  4.4537e-03,  3.6315e-02,  ...,  1.0371e-01,
              7.3973e-02,  5.9085e-02],
            [ 1.6784e-02,  8.8902e-03,  3.1312e-02,  ...,  9.6964e-02,
              8.3749e-02,  9.6970e-02]],
  
           [[-7.7192e-03, -8.7711e-03,  1.4143e-02,  ...,  3.3901e-02,
              2.5483e-02,  2.4275e-02],
            [ 5.3961e-02,  4.4677e-02,  3.4326e-02,  ...,  1.3392e-02,
              1.9135e-02,  3.7995e-02],
            [ 1.0251e-03, -5.4513e-02, -1.0225e-01,

Pour entraîner seulement la dernière couche, en PyTorch, nous pouvons seulement envoyer les paramètres de cette couche à l'optimiseur.

Les autres paramètres resteront inchangés.

Nous en profitons pour bien initialiser ces nouveaux paramètres.

In [26]:
def get_lr_for_last_layer_only(net):
    # Filter params
    classification_layer_params = [(n, p) for n, p in net.named_parameters() if 'fc' in n]
    
    # Initialize those
    for n, p in classification_layer_params:
        if 'weight' in n:
            kaiming_normal_(p)
        if 'bias' in n:
            constant_(p, 0)
    
    # Return the list of different params/learning rates
    classification_layer_params = [p for _, p in classification_layer_params]
    return [
        {'params': classification_layer_params, 'lr': 1e-2, 'momentum':0.9, 'nesterov': True},
    ]


In [27]:
params = get_lr_for_last_layer_only(net)

In [28]:
model = train('deep_net', net, params)

Epoch 1/5 227.35s Step 1250/1250: loss: 2.194660, acc: 44.357500, val_loss: 1.705759, val_acc: 54.480000
Epoch 2/5 224.01s Step 1250/1250: loss: 1.614046, acc: 56.240000, val_loss: 1.688765, val_acc: 55.390000
Epoch 3/5 226.78s Step 1250/1250: loss: 1.500730, acc: 58.952500, val_loss: 1.687270, val_acc: 55.550000
Epoch 4/5 225.50s Step 1250/1250: loss: 1.436929, acc: 60.297500, val_loss: 1.641492, val_acc: 57.650000
Epoch 5/5 230.32s Step 1250/1250: loss: 1.388779, acc: 61.680000, val_loss: 1.619588, val_acc: 58.750000


## Ici nous allons entraîner la dernière couche et peaufiner l'ensemble du réseau.

Même principe que l'étape précédente, mais nous allons spécifier différents taux d'apprentissage.

In [None]:
def get_lr_for_last_layer_and_fine_tune_conv(net):
    # Filter params
    classification_layer_params = [(n, p) for n, p in net.named_parameters() if 'fc' in n]
    convolutional_layer_params = [p for n, p in net.named_parameters() if 'fc' not in n]
    
    # Initialize those
    for n, p in classification_layer_params:
        if 'weight' in n:
            kaiming_normal_(p)
        if 'bias' in n:
            constant_(p, 0)
    
    # Return the list of different params/learning rates
    classification_layer_params = [p for _, p in classification_layer_params]
    return [
        {'params': classification_layer_params, 'lr': 1e-2, 'momentum':0.9, 'nesterov': True},
        {'params': convolutional_layer_params, 'lr': 1e-4, 'momentum':0.9, 'nesterov': True},
    ]

In [None]:
params = get_lr_for_last_layer_and_fine_tune_conv(net)

In [None]:
net = models.resnet34(pretrained=True)
net.fc = nn.Linear(net.fc.in_features, num_classes)
model = train('deep_net', net, params)

## Nous pouvons aussi effectuer un apprentissage complet du réseau et voir si les résultats s'améliorent.

In [None]:
net = models.resnet34(pretrained=True)
net.fc = nn.Linear(net.fc.in_features, num_classes)
model = train('deep_net', net)