In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import torch
import numpy as np
from sklearn.metrics import roc_auc_score

<IPython.core.display.Javascript object>

# Практикум 3, DL for images



## 1. Пример работы со свертками 


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

<IPython.core.display.Javascript object>

In [4]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])

training_data = datasets.CIFAR10(
    root='data',
    train=True, 
    download=True,
    transform=transform
)

test_data = datasets.CIFAR10(
    root='data',
    train=False, 
    download=True,
    transform=transform
)

trainloader = torch.utils.data.DataLoader(training_data, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


<IPython.core.display.Javascript object>

In [5]:
from torch import nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


<IPython.core.display.Javascript object>

In [10]:
import torch.optim as optim

model = CNN().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

<IPython.core.display.Javascript object>

In [11]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i + 1) % 100 == 0:  # print every 100 mini-batches
            print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")

[1,   100] loss: 2.305
[1,   200] loss: 2.303
[1,   300] loss: 2.303
[2,   100] loss: 2.301
[2,   200] loss: 2.299
[2,   300] loss: 2.298
[3,   100] loss: 2.293
[3,   200] loss: 2.288
[3,   300] loss: 2.280
[4,   100] loss: 2.237
[4,   200] loss: 2.179
[4,   300] loss: 2.112
[5,   100] loss: 2.038
[5,   200] loss: 2.012
[5,   300] loss: 1.977
[6,   100] loss: 1.930
[6,   200] loss: 1.916
[6,   300] loss: 1.882
[7,   100] loss: 1.827
[7,   200] loss: 1.816
[7,   300] loss: 1.770
[8,   100] loss: 1.718
[8,   200] loss: 1.697
[8,   300] loss: 1.670
[9,   100] loss: 1.633
[9,   200] loss: 1.621
[9,   300] loss: 1.597
[10,   100] loss: 1.564
[10,   200] loss: 1.560
[10,   300] loss: 1.539
Finished Training


<IPython.core.display.Javascript object>

In [12]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
model.eval()
model.to("cpu")
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    "Accuracy of the network on the 10000 test images: %d %%" % (100 * correct / total)
)

Accuracy of the network on the 10000 test images: 45 %


<IPython.core.display.Javascript object>

In [13]:
# prepare to count predictions for each class
classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname, accuracy))

Accuracy for class plane is: 44.7 %
Accuracy for class car   is: 57.1 %
Accuracy for class bird  is: 16.2 %
Accuracy for class cat   is: 29.3 %
Accuracy for class deer  is: 39.8 %
Accuracy for class dog   is: 44.1 %
Accuracy for class frog  is: 65.7 %
Accuracy for class horse is: 44.1 %
Accuracy for class ship  is: 62.0 %
Accuracy for class truck is: 48.0 %


<IPython.core.display.Javascript object>

# Задание 1

Написать и обучить нейронную сеть на датасете CIFAR (5 баллов)
1. Замените сверточные слои размера 5х5 на два идущих подряд слоя размером 3х3
2. Обучите модель на GPU, 20 эпох


Дополнительно:
1. (5 баллов) Переписать код с использованием pytorch lightning (см. документацию фреймворка https://pytorch-lightning.readthedocs.io/en/latest/starter/introduction.html)

In [14]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms

transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ]
)

training_data = datasets.CIFAR10(
    root="data", train=True, download=True, transform=transform
)

test_data = datasets.CIFAR10(
    root="data", train=False, download=True, transform=transform
)

trainloader = torch.utils.data.DataLoader(training_data, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


<IPython.core.display.Javascript object>

In [15]:
from torch import nn
import torch.nn.functional as F

class CNN_3_3(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.fc1 = nn.Linear(16*6*6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


<IPython.core.display.Javascript object>

In [16]:
import torch.optim as optim

model = CNN_3_3().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if (i + 1) % 100 == 0:  # print every 100 mini-batches
            print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

print("Finished Training")

[1,   100] loss: 2.303
[1,   200] loss: 2.303
[1,   300] loss: 2.302
[2,   100] loss: 2.300
[2,   200] loss: 2.299
[2,   300] loss: 2.297
[3,   100] loss: 2.293
[3,   200] loss: 2.288
[3,   300] loss: 2.283
[4,   100] loss: 2.260
[4,   200] loss: 2.234
[4,   300] loss: 2.196
[5,   100] loss: 2.105
[5,   200] loss: 2.069
[5,   300] loss: 2.027
[6,   100] loss: 1.969
[6,   200] loss: 1.927
[6,   300] loss: 1.919
[7,   100] loss: 1.882
[7,   200] loss: 1.860
[7,   300] loss: 1.851
[8,   100] loss: 1.814
[8,   200] loss: 1.800
[8,   300] loss: 1.777
[9,   100] loss: 1.740
[9,   200] loss: 1.718
[9,   300] loss: 1.708
[10,   100] loss: 1.669
[10,   200] loss: 1.656
[10,   300] loss: 1.645
[11,   100] loss: 1.611
[11,   200] loss: 1.607
[11,   300] loss: 1.584
[12,   100] loss: 1.552
[12,   200] loss: 1.541
[12,   300] loss: 1.538
[13,   100] loss: 1.508
[13,   200] loss: 1.510
[13,   300] loss: 1.484
[14,   100] loss: 1.462
[14,   200] loss: 1.457
[14,   300] loss: 1.453
[15,   100] loss: 1

<IPython.core.display.Javascript object>

In [17]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
model.eval()
model.to("cpu")
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    "Accuracy of the network on the 10000 test images: %d %%" % (100 * correct / total)
)

Accuracy of the network on the 10000 test images: 52 %


<IPython.core.display.Javascript object>

In [18]:
# prepare to count predictions for each class
classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname, accuracy))

Accuracy for class plane is: 41.8 %
Accuracy for class car   is: 64.0 %
Accuracy for class bird  is: 44.2 %
Accuracy for class cat   is: 41.4 %
Accuracy for class deer  is: 21.0 %
Accuracy for class dog   is: 51.2 %
Accuracy for class frog  is: 53.9 %
Accuracy for class horse is: 67.3 %
Accuracy for class ship  is: 74.4 %
Accuracy for class truck is: 61.2 %


<IPython.core.display.Javascript object>

## use pytroch-lighting

In [19]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms
import pytorch_lightning as pl

<IPython.core.display.Javascript object>

In [4]:
class CIFAR10DataModule(pl.LightningDataModule):
    def __init__(self, batch_size=128, transform=None):
        super().__init__()
        self.batch_size = batch_size
        self.transform = (
            transform
            if transform
            else transforms.Compose(
                [
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
                ]
            )
        )

    def setup(self, stage=None, transform=None):

        self.training_data = datasets.CIFAR10(
            root="data", train=True, download=True, transform=self.transform
        )

        self.test_data = datasets.CIFAR10(
            root="data", train=False, download=True, transform=self.transform
        )

    def train_dataloader(self):
        return torch.utils.data.DataLoader(
            self.training_data,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=8,
            pin_memory=True,
        )

    def test_dataloader(self):
        return torch.utils.data.DataLoader(
            self.test_data,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=8,
            pin_memory=True,
        )

In [21]:
import torch.nn.functional as F
import torch.optim as optim


class CNN_3_3_LightningModule(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.cnn = CNN_3_3()

    def forward(self, x):
        return self.cnn(x)

    def training_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        return F.cross_entropy(outputs, targets)

    def test_step(self, batch, batch_id):
        inputs, targets = batch
        outputs = self(inputs)
        predictions = torch.argmax(outputs, 1)
        return [predictions, targets]

    def test_epoch_end(self, outputs):
        classes = (
            "plane",
            "car",
            "bird",
            "cat",
            "deer",
            "dog",
            "frog",
            "horse",
            "ship",
            "truck",
        )

        correct_pred = {classname: 0 for classname in classes}
        total_pred = {classname: 0 for classname in classes}
        for predictions, labels in outputs:
            for label, prediction in zip(
                labels.cpu().numpy(), predictions.cpu().numpy()
            ):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1
        self.log(
            "Accuracy of the network on the 10000 test images %: ",
            100 * sum(correct_pred.values()) / sum(total_pred.values()),
        )
        for classname, correct_count in correct_pred.items():
            accuracy = 100 * float(correct_count) / total_pred[classname]
            self.log(f"Accuracy for class {classname:5s} is %", accuracy)

    def configure_optimizers(self):
        return optim.SGD(self.cnn.parameters(), lr=0.001, momentum=0.9)

<IPython.core.display.Javascript object>

In [22]:
data_module = CIFAR10DataModule()

<IPython.core.display.Javascript object>

In [23]:
model = CNN_3_3_LightningModule()
trainer = pl.Trainer(accelerator="gpu", devices=1, max_epochs=20)
trainer.fit(model, datamodule=data_module)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type    | Params
---------------------------------
0 | cnn  | CNN_3_3 | 81.3 K
---------------------------------
81.3 K    Trainable params
0         Non-trainable params
81.3 K    Total params
0.325     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]







<IPython.core.display.Javascript object>

In [24]:
trainer.test(model, datamodule=data_module)

Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]



────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
                    Test metric                                         DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
           Accuracy for class bird  is %                             39.20000076293945
           Accuracy for class car   is %                             63.29999923706055
           Accuracy for class cat   is %                                    28.0
           Accuracy for class deer  is %                             39.29999923706055
           Accuracy for class dog   is %                             44.29999923706055
           Accuracy for class frog  is %                              75.0999984741211
           Accuracy for class horse is %                                    54.5
           Accuracy for class plane is %                             54.5999

[{'Accuracy of the network on the 10000 test images %: ': 53.15999984741211,
  'Accuracy for class plane is %': 54.599998474121094,
  'Accuracy for class car   is %': 63.29999923706055,
  'Accuracy for class bird  is %': 39.20000076293945,
  'Accuracy for class cat   is %': 28.0,
  'Accuracy for class deer  is %': 39.29999923706055,
  'Accuracy for class dog   is %': 44.29999923706055,
  'Accuracy for class frog  is %': 75.0999984741211,
  'Accuracy for class horse is %': 54.5,
  'Accuracy for class ship  is %': 69.9000015258789,
  'Accuracy for class truck is %': 63.400001525878906}]

<IPython.core.display.Javascript object>

## 2. Используем VGG

In [25]:
import torchvision.models as models

vgg11 = models.vgg11(pretrained=True)

<IPython.core.display.Javascript object>

In [26]:
from torchvision import transforms

transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

training_data = datasets.CIFAR10(
    root="data",  # root is the path where the train/test data is stored
    train=True,  # train specifies training or test dataset
    download=True,  # download=True downloads the data from the internet if it’s not available at root
    transform=transform,  # transform and target_transform specify the feature and label transformations
)

test_data = datasets.CIFAR10(
    root="data", train=False, download=True, transform=transform
)

trainloader = torch.utils.data.DataLoader(
    training_data, batch_size=128, shuffle=True, num_workers=2
)

testloader = torch.utils.data.DataLoader(
    test_data, batch_size=128, shuffle=False, num_workers=2
)

classes = (
    "plane",
    "car",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck",
)

Files already downloaded and verified
Files already downloaded and verified


<IPython.core.display.Javascript object>

# Задание 2 (5 баллов)
1. Обучитите на GPU претренированную модель VGG (минимум 5 эпох)
2. Протестируйте ее на всех данных и на каждом классе отдельно

Дополнительное задание (5 баллов):     

* Взять какой-нибудь специфичный датасет с картинками
* Взять претренированную VGG (или другу модель) и сделать transfer learning на выбранный датасет

In [18]:
import torch.nn.functional as F
import torch.optim as optim
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms
import pytorch_lightning as pl
import torchvision.models as models
import torch.nn as nn

class VGG_LightningModule(pl.LightningModule):
    def __init__(self, n_classes=10):
        super().__init__()
        self.vgg11 = models.vgg11(pretrained=True)
        self.fc = nn.Linear(1000, n_classes)

    def forward(self, x):
        x =  self.vgg11(x)
        return self.fc(x)

    def training_step(self, batch):
        inputs, targets = batch
        outputs = self(inputs)
        return F.cross_entropy(outputs, targets)

    def test_step(self, batch, batch_id):
        inputs, targets = batch
        outputs = self(inputs)
        predictions = torch.argmax(outputs, 1)
        return [predictions, targets]

    def test_epoch_end(self, outputs):
        classes = (
            "plane",
            "car",
            "bird",
            "cat",
            "deer",
            "dog",
            "frog",
            "horse",
            "ship",
            "truck",
        )

        correct_pred = {classname: 0 for classname in classes}
        total_pred = {classname: 0 for classname in classes}
        for predictions, labels in outputs:
            for label, prediction in zip(
                labels.cpu().numpy(), predictions.cpu().numpy()
            ):
                if label == prediction:
                    correct_pred[classes[label]] += 1
                total_pred[classes[label]] += 1
        self.log(
            "Accuracy of the network on the 10000 test images %: ",
            100 * sum(correct_pred.values()) / sum(total_pred.values()),
        )
        for classname, correct_count in correct_pred.items():
            accuracy = 100 * float(correct_count) / total_pred[classname]
            self.log(f"Accuracy for class {classname:5s} is %", accuracy)

    def configure_optimizers(self):
        return optim.SGD(self.vgg11.parameters(), lr=0.001, momentum=0.9)

In [19]:
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)
data_module = CIFAR10DataModule(transform=transform)

In [20]:
model = VGG_LightningModule()
trainer = pl.Trainer(accelerator="gpu", devices=1, max_epochs=10)
trainer.fit(model, datamodule=data_module)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type   | Params
---------------------------------
0 | vgg11 | VGG    | 132 M 
1 | fc    | Linear | 10.0 K
---------------------------------
132 M     Trainable params
0         Non-trainable params
132 M     Total params
531.493   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]





In [21]:
trainer.test(model, datamodule=data_module)

Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]



────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
                    Test metric                                         DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
           Accuracy for class bird  is %                              74.9000015258789
           Accuracy for class car   is %                                    91.5
           Accuracy for class cat   is %                             68.30000305175781
           Accuracy for class deer  is %                                    84.5
           Accuracy for class dog   is %                             77.30000305175781
           Accuracy for class frog  is %                              87.4000015258789
           Accuracy for class horse is %                             89.30000305175781
           Accuracy for class plane is %                              91.400



[{'Accuracy of the network on the 10000 test images %: ': 84.8499984741211,
  'Accuracy for class plane is %': 91.4000015258789,
  'Accuracy for class car   is %': 91.5,
  'Accuracy for class bird  is %': 74.9000015258789,
  'Accuracy for class cat   is %': 68.30000305175781,
  'Accuracy for class deer  is %': 84.5,
  'Accuracy for class dog   is %': 77.30000305175781,
  'Accuracy for class frog  is %': 87.4000015258789,
  'Accuracy for class horse is %': 89.30000305175781,
  'Accuracy for class ship  is %': 91.0,
  'Accuracy for class truck is %': 92.9000015258789}]

Более глубокая модель показывает лучшее качество(что ожидаемо). Можно выбить еще лучше, если подобрать оптимизатор и параметры, использовать валидационный датасет, аугментации.