In [1]:
import torch
from torch import nn
from torchvision import models, datasets, transforms
import time
from tqdm.auto import tqdm

In [2]:
# Обязательно к прочтению: тред на тему различных состояний нейронной сети в PyTorch
# https://stackoverflow.com/questions/51748138/pytorch-how-to-set-requires-grad-false
def set_requires_grad(model, value=False):
    for param in model.parameters():
        param.requires_grad = value

In [3]:
num_classes = 10
input_size = 224
batch_size = 64

In [4]:
model = models.resnet18(pretrained=True,)
set_requires_grad(model, False)
num_ftrs = model.fc.in_features

In [5]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [6]:
normalize = transforms.Compose([
    transforms.Resize(input_size),
    transforms.CenterCrop(input_size),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

### претренированная модель на imagenet, дообучается только последний слой на датасете imagenette

In [7]:
imagenet_trainset = datasets.ImageFolder(root="./imagenette/imagenette2-320/train", 
                                         transform=normalize)

imagenet_trainloader = torch.utils.data.DataLoader(imagenet_trainset, batch_size=batch_size, shuffle=True)

imagenet_testset = datasets.ImageFolder(root="./imagenette/imagenette2-320/val",
                                        transform=normalize)

imagenet_testloader = torch.utils.data.DataLoader(imagenet_testset, batch_size=batch_size, shuffle=False)

imagenet_loaders = {'train': imagenet_trainloader, 'val': imagenet_testloader}

In [8]:
# Optional: проверка выхода AdaptivePooling
# def print_hook(m, i):
#   print("Inside avgpool", i[0].shape)

# handle = model.avgpool.register_forward_pre_hook(print_hook)
# model(torch.ones(1,3,512,512))
# handle.remove()

In [9]:
imagenette_fc = nn.Linear(num_ftrs, num_classes)
model.fc = imagenette_fc
model = model.to(device)
pretrain_optimizer = torch.optim.SGD(model.fc.parameters(),
                                     lr=0.001, momentum=0.9)
train_optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [10]:
criterion = nn.CrossEntropyLoss()

In [11]:
def train_model(model, dataloaders, criterion, optimizer,
                phases, num_epochs=3):
    start_time = time.time()

    acc_history = {k: list() for k in phases}
    loss_history = {k: list() for k in phases}

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in phases:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            n_batches = len(dataloaders[phase])
            for inputs, labels in tqdm(dataloaders[phase], total=n_batches):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double()
            epoch_acc /= len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss,
                                                       epoch_acc))
            loss_history[phase].append(epoch_loss)
            acc_history[phase].append(epoch_acc)

        print()

    time_elapsed = time.time() - start_time
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                        time_elapsed % 60))

    return model, acc_history

In [12]:
model, acc_history = train_model(model, imagenet_loaders, criterion, pretrain_optimizer,
                ['train', 'val'], num_epochs=3)

Epoch 0/2
----------


  0%|          | 0/148 [00:00<?, ?it/s]

train Loss: 0.6732 Acc: 0.8515


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.2036 Acc: 0.9738

Epoch 1/2
----------


  0%|          | 0/148 [00:00<?, ?it/s]

train Loss: 0.1820 Acc: 0.9714


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.1372 Acc: 0.9783

Epoch 2/2
----------


  0%|          | 0/148 [00:00<?, ?it/s]

train Loss: 0.1386 Acc: 0.9761


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.1117 Acc: 0.9794

Training complete in 2m 47s


In [13]:
for i, acc in enumerate(acc_history['val']):
    print(f"Epoch: {i+1}. Accuracy: {acc.cpu().numpy(): .5f}")

Epoch: 1. Accuracy:  0.97376
Epoch: 2. Accuracy:  0.97834
Epoch: 3. Accuracy:  0.97936


### Дообучение всей модели на цифаре

In [14]:
trainset = datasets.CIFAR10(root='./data', train=True,
                            download=True, transform=normalize)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False,
                           download=True, transform=normalize)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False)

loaders = {'train': trainloader, 'val': testloader}
classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [15]:
model.to('cpu')
cifar_fc = nn.Linear(num_ftrs, num_classes)
model.fc = cifar_fc
model.to(device)
set_requires_grad(model, True)

train_optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

train_model(model, loaders, criterion, train_optimizer,
            phases=['train', 'val'], num_epochs=3)

Epoch 0/2
----------


  0%|          | 0/782 [00:00<?, ?it/s]

train Loss: 0.4900 Acc: 0.8442


  0%|          | 0/157 [00:00<?, ?it/s]

val Loss: 0.2269 Acc: 0.9229

Epoch 1/2
----------


  0%|          | 0/782 [00:00<?, ?it/s]

train Loss: 0.1740 Acc: 0.9438


  0%|          | 0/157 [00:00<?, ?it/s]

val Loss: 0.1789 Acc: 0.9404

Epoch 2/2
----------


  0%|          | 0/782 [00:00<?, ?it/s]

train Loss: 0.1026 Acc: 0.9685


  0%|          | 0/157 [00:00<?, ?it/s]

val Loss: 0.1654 Acc: 0.9439

Training complete in 13m 49s


(ResNet(
   (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
   (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
   (relu): ReLU(inplace=True)
   (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
   (layer1): Sequential(
     (0): BasicBlock(
       (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (relu): ReLU(inplace=True)
       (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
     )
     (1): BasicBlock(
       (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
       (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       (relu): ReLU

In [18]:
model_с, acc_history = _
for i, acc in enumerate(acc_history['val']):
    print(f"Epoch: {i+1}. Accuracy: {acc.cpu().numpy(): .5f}")

Epoch: 1. Accuracy:  0.92290
Epoch: 2. Accuracy:  0.94040
Epoch: 3. Accuracy:  0.94390


### Вернуть оригинальный последний слой модели и проверить качество на ImageNet.

In [19]:
model = model.to('cpu')
model.fc = imagenette_fc
model = model.to(device)
pretrain_optimizer = torch.optim.SGD(model.fc.parameters(),
                                     lr=0.001, momentum=0.9)
train_optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

model_c_in, acc_history = train_model(model, imagenet_loaders, criterion, pretrain_optimizer,
                ['val'], num_epochs=1)

Epoch 0/0
----------


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.5801 Acc: 0.8201

Training complete in 0m 17s


### Качество снизилось, но не катастрофически (было 0.97936, стало 0.8201)

### Дообучить только последний слой на ImageNette и проверить удалось ли добиться исходного качества

In [23]:
set_requires_grad(model, False)
for p in model.fc.parameters():
    p.requires_grad = True
model_c_in, acc_history = train_model(model, imagenet_loaders, criterion, pretrain_optimizer,
                ['train','val'], num_epochs=3)

Epoch 0/2
----------


  0%|          | 0/148 [00:00<?, ?it/s]

train Loss: 0.1736 Acc: 0.9642


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.1506 Acc: 0.9638

Epoch 1/2
----------


  0%|          | 0/148 [00:00<?, ?it/s]

train Loss: 0.1518 Acc: 0.9664


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.1342 Acc: 0.9676

Epoch 2/2
----------


  0%|          | 0/148 [00:00<?, ?it/s]

train Loss: 0.1363 Acc: 0.9691


  0%|          | 0/62 [00:00<?, ?it/s]

val Loss: 0.1256 Acc: 0.9687

Training complete in 2m 47s


### Практически удалось достигнуть предыдущего качества (было 0.9794 стало 0.9687)