# Feed-forward сети

Итак, давайте потренируемся тренировать нейронные сети прямого распространения (так как делали на паре)
При этом попробуем создать свою функцию активации на одном из слоев

In [108]:
# Сделаем необходимые импорты
import torch
import numpy as np
import torch.nn.functional as F
from torch import nn
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from torch import optim
from IPython.display import clear_output
import time
from tqdm import tqdm
import random
from PIL import Image

In [6]:
# Загрузим датасет CIFAR-100, сразу же создадим dataloader для него
# Если вам не хватает вычислительных ресурсов, то можно вернуться к CIFAR-10
train_dataset = torchvision.datasets.CIFAR10(root='data/',
                                             train=True,  
                                             transform=transforms.ToTensor(),
                                             download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)

Files already downloaded and verified


In [43]:
test_dataset = torchvision.datasets.CIFAR10(root='./data', 
                                            train=False,
                                            transform=transforms.ToTensor(),
                                            download=True,)
test_loader = torch.utils.data.DataLoader(test_dataset, 
                                          batch_size=16,
                                          shuffle=False)

Files already downloaded and verified


In [18]:
classes = train_dataset.classes
classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [9]:
train_dataset.data.shape

(50000, 32, 32, 3)

In [23]:
test_dataset.data.shape

(10000, 32, 32, 3)

In [10]:
# Создайте собственную архитектуру! Можете использовать все, что угодно, но в ограничении на использование линейные слои (пока без сверток)
# Давайте добавим ограниченный Leaky_relu, то есть output = max(0.1x, 0.5x)
# Ваша задача добавить его в архитектуру сети как функцию активации
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 4 * hidden_dim)
        self.fc2 = nn.Linear(4 * hidden_dim, 2 * hidden_dim)
        self.fc3 = nn.Linear(2 * hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, output_dim)
    
    def __lrelu(self, x, low=0.1, high=0.5):
        return torch.max(x * low, x * high)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.leaky_relu(x, 0.05)
        x = self.fc2(x)
        x = F.leaky_relu(x, 0.05)
        x = self.fc3(x)
        x = self.__lrelu(x)
        x = self.fc4(x)
        #x = F.softmax(x)
        return x
    
    def predict(self, x):
        x = self.forward(x)
        x = F.softmax(x)
        return x

net = Net(32 * 32 * 3, 100, 10)

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [13]:
# Запустить обучение (по аналогии с тем, что делали на паре)
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Training is finished!')

  0%|          | 0/10 [00:00<?, ?it/s]

[1,     1] loss: 0.001
[1,   301] loss: 0.344
[1,   601] loss: 0.336


 10%|█         | 1/10 [00:10<01:34, 10.45s/it]

[2,     1] loss: 0.001
[2,   301] loss: 0.309
[2,   601] loss: 0.303


 20%|██        | 2/10 [00:20<01:21, 10.25s/it]

[3,     1] loss: 0.001
[3,   301] loss: 0.289
[3,   601] loss: 0.285


 30%|███       | 3/10 [00:31<01:15, 10.78s/it]

[4,     1] loss: 0.001
[4,   301] loss: 0.278
[4,   601] loss: 0.275


 40%|████      | 4/10 [00:44<01:07, 11.30s/it]

[5,     1] loss: 0.001
[5,   301] loss: 0.269
[5,   601] loss: 0.265


 50%|█████     | 5/10 [00:55<00:57, 11.46s/it]

[6,     1] loss: 0.001
[6,   301] loss: 0.260
[6,   601] loss: 0.259


 60%|██████    | 6/10 [01:07<00:46, 11.64s/it]

[7,     1] loss: 0.001
[7,   301] loss: 0.253
[7,   601] loss: 0.252


 70%|███████   | 7/10 [01:20<00:35, 11.84s/it]

[8,     1] loss: 0.001
[8,   301] loss: 0.248
[8,   601] loss: 0.246


 80%|████████  | 8/10 [01:32<00:24, 12.03s/it]

[9,     1] loss: 0.001
[9,   301] loss: 0.241
[9,   601] loss: 0.242


 90%|█████████ | 9/10 [01:46<00:12, 12.57s/it]

[10,     1] loss: 0.001
[10,   301] loss: 0.237
[10,   601] loss: 0.236


100%|██████████| 10/10 [01:57<00:00, 11.76s/it]

Training is finished!





Calculate accuracy

In [44]:
data_iter = iter(test_loader)

In [45]:
images, labels = next(data_iter)
labels

tensor([3, 8, 8, 0, 6, 6, 1, 6, 3, 1, 0, 9, 5, 7, 9, 8])

In [46]:
net.eval()
outputs = net(images)
outputs

tensor([[-7.2728e-01,  2.9940e-02,  9.1230e-01,  2.0052e+00, -2.3041e-01,
          1.5666e+00,  1.4700e+00, -2.8651e+00,  5.8745e-01, -1.9406e+00],
        [ 1.8165e+00,  3.3776e+00, -1.4165e+00, -1.4577e+00, -1.6867e+00,
         -2.2428e+00, -3.2711e+00, -1.7588e+00,  3.7623e+00,  3.8530e+00],
        [ 2.3779e+00,  2.8355e+00, -7.9422e-01, -1.4283e+00, -1.2667e+00,
         -1.9438e+00, -4.3288e+00, -1.5453e+00,  4.0085e+00,  2.4073e+00],
        [ 2.1591e+00,  1.0463e+00,  4.3292e-01, -9.2424e-01, -5.0109e-01,
         -1.0192e+00, -3.3220e+00, -7.2781e-01,  2.5441e+00,  4.8883e-01],
        [-7.4158e-01, -2.0179e+00,  1.8666e+00,  6.3143e-01,  2.4917e+00,
          7.3997e-01,  2.2789e+00, -2.1409e-02, -8.8384e-01, -2.2130e+00],
        [-1.3101e+00, -1.6838e-01,  6.9766e-01,  1.8987e+00,  6.6608e-01,
          1.4950e+00,  2.6361e+00, -2.6033e-01, -2.8196e+00, -3.8427e-01],
        [-1.3896e+00,  1.1168e+00, -1.8360e-01,  2.5398e+00, -1.3319e+00,
          2.3896e+00,  7.6129e-0

In [47]:
_, predicted = torch.max(outputs, 1)
predicted

tensor([3, 9, 8, 8, 4, 6, 3, 6, 2, 1, 8, 9, 1, 7, 9, 8])

In [53]:
data_iter = iter(test_loader)
true_classes = list()

for it in data_iter:
    true_classes.extend(it[1])

true_classes = np.array(true_classes)
len(true_classes), true_classes[:10]

(10000, array([3, 8, 8, 0, 6, 6, 1, 6, 3, 1]))

In [54]:
net.eval()
data_iter = iter(test_loader)
pred_classes = list()

for it in data_iter:
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    pred_classes.extend(predicted)

# pred_classes = np.array([classes[predicted[i]] for i in range(len(labels))])
pred_classes = np.array(pred_classes)
len(pred_classes), pred_classes[:10]

(10000, array([3, 9, 8, 8, 4, 6, 3, 6, 2, 1]))

In [55]:
print(f'Accuracy = {(true_classes == pred_classes).sum() / len(true_classes)}')

Accuracy = 0.0975


Метрика на тестовой выборке довольна низка. Попробуем улучшить.

### Изменение слоёв и их количества

In [87]:
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 8 * hidden_dim)
        self.do1 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(8 * hidden_dim, 2 * hidden_dim)
        self.do2 = nn.Dropout(0.5)
        self.fc3 = nn.Linear(2 * hidden_dim, hidden_dim)
        self.do3 = nn.Dropout(0.33)
        self.fc4 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.do4 = nn.Dropout(0.25)
        self.fc5 = nn.Linear(hidden_dim // 2, output_dim)
    
    def __lrelu(self, x, low=0.1, high=0.5):
        return torch.max(x * low, x * high)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.leaky_relu(x, 0.05)
        x = self.do1(x)
        x = F.leaky_relu(x, 0.05)
        x = self.fc2(x)
        x = F.leaky_relu(x, 0.05)
        x = self.do2(x)
        x = F.leaky_relu(x, 0.05)
        x = self.fc3(x)
        x = self.__lrelu(x)
        x = self.do3(x)
        x = F.leaky_relu(x, 0.05)
        x = self.fc4(x)
        x = self.__lrelu(x)
        x = self.do4(x)
        x = F.leaky_relu(x, 0.05)
        x = self.fc5(x)
        #x = F.softmax(x)
        return x
    
    def predict(self, x):
        x = self.forward(x)
        x = F.softmax(x)
        return x

net = Net(32 * 32 * 3, 128, 10)

И сразу поменяем оптимизатор (чтобы два раза не учить)

### Оптимизатор

In [88]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.ASGD(net.parameters(), lr=0.001)

In [89]:
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Training is finished!')

  0%|          | 0/10 [00:00<?, ?it/s]

[1,     1] loss: 0.001
[1,   301] loss: 0.346
[1,   601] loss: 0.346


 10%|█         | 1/10 [00:18<02:45, 18.38s/it]

[2,     1] loss: 0.001
[2,   301] loss: 0.346
[2,   601] loss: 0.346


 20%|██        | 2/10 [00:36<02:27, 18.44s/it]

[3,     1] loss: 0.001
[3,   301] loss: 0.346
[3,   601] loss: 0.346


 30%|███       | 3/10 [00:55<02:10, 18.71s/it]

[4,     1] loss: 0.001
[4,   301] loss: 0.345
[4,   601] loss: 0.346


 40%|████      | 4/10 [01:14<01:52, 18.78s/it]

[5,     1] loss: 0.001
[5,   301] loss: 0.345
[5,   601] loss: 0.346


 50%|█████     | 5/10 [01:33<01:34, 18.82s/it]

[6,     1] loss: 0.001
[6,   301] loss: 0.345
[6,   601] loss: 0.345


 60%|██████    | 6/10 [01:52<01:14, 18.75s/it]

[7,     1] loss: 0.001
[7,   301] loss: 0.345
[7,   601] loss: 0.345


 70%|███████   | 7/10 [02:10<00:56, 18.69s/it]

[8,     1] loss: 0.001
[8,   301] loss: 0.345
[8,   601] loss: 0.345


 80%|████████  | 8/10 [02:29<00:37, 18.75s/it]

[9,     1] loss: 0.001
[9,   301] loss: 0.345
[9,   601] loss: 0.345


 90%|█████████ | 9/10 [02:48<00:18, 18.77s/it]

[10,     1] loss: 0.001
[10,   301] loss: 0.345
[10,   601] loss: 0.345


100%|██████████| 10/10 [03:07<00:00, 18.73s/it]

Training is finished!





In [90]:
data_iter = iter(test_loader)
true_classes = list()

for it in data_iter:
    true_classes.extend(it[1])

true_classes = np.array(true_classes)
len(true_classes), true_classes[:10]

(10000, array([3, 8, 8, 0, 6, 6, 1, 6, 3, 1]))

In [91]:
net.eval()
data_iter = iter(test_loader)
pred_classes = list()

for it in data_iter:
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    pred_classes.extend(predicted)

# pred_classes = np.array([classes[predicted[i]] for i in range(len(labels))])
pred_classes = np.array(pred_classes)
len(pred_classes), pred_classes[:10]

(10000, array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))

In [92]:
print(f'Accuracy = {(true_classes == pred_classes).sum() / len(true_classes)}')

Accuracy = 0.1


Есть небольшое улучшение.

### Аугментация

Добавим немного модицикаций исходного изображения. Возможно улучшатся результаты.

In [110]:
class RandomResizedCrop:
    def __call__(self, x):
        im_w, im_h = x.size
        scale = random.uniform(0.5, 1.0)
        target_w, target_h = int(im_w * scale), int(im_h * scale)
        x1 = random.randint(0, im_w - target_w)
        y1 = random.randint(0, im_h - target_h)
        x2, y2 = x1 + target_w, y1 + target_h
        return x.crop((x1, y1, x2, y2)).resize((im_w, im_h))

class RandomHorizontalFlip:
    def __call__(self, x):
        if random.random() < 0.5:
            return x.transpose(Image.FLIP_LEFT_RIGHT)
        return x

class RandomRotation:
    def __init__(self, degrees=15):
        self.degrees = degrees

    def __call__(self, x):
        angle = random.uniform(-self.degrees, self.degrees)
        return x.rotate(angle)

In [111]:
train_transform = transforms.Compose([
    RandomResizedCrop(),
    RandomHorizontalFlip(),
    RandomRotation(10),
    transforms.ToTensor()
])

In [112]:
test_transform = transforms.Compose([
    transforms.ToTensor()
])

In [113]:
train_dataset = torchvision.datasets.CIFAR10(root='data/',
                                             train=True,  
                                             transform=train_transform,
                                             download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)


test_dataset = torchvision.datasets.CIFAR10(root='./data', 
                                            train=False,
                                            transform=test_transform,
                                            download=True,)
test_loader = torch.utils.data.DataLoader(test_dataset, 
                                          batch_size=16,
                                          shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [114]:
train_dataset.data.shape

(50000, 32, 32, 3)

In [115]:
test_dataset.data.shape

(10000, 32, 32, 3)

In [116]:
net = Net(32 * 32 * 3, 128, 10)

In [117]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.ASGD(net.parameters(), lr=0.005)

In [118]:
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Training is finished!')

  0%|          | 0/10 [00:00<?, ?it/s]

[1,     1] loss: 0.001
[1,   301] loss: 0.346
[1,   601] loss: 0.346


 10%|█         | 1/10 [00:22<03:24, 22.72s/it]

[2,     1] loss: 0.001
[2,   301] loss: 0.346
[2,   601] loss: 0.345


 20%|██        | 2/10 [00:46<03:06, 23.26s/it]

[3,     1] loss: 0.001
[3,   301] loss: 0.345
[3,   601] loss: 0.345


 30%|███       | 3/10 [01:09<02:43, 23.41s/it]

[4,     1] loss: 0.001
[4,   301] loss: 0.345
[4,   601] loss: 0.345


 40%|████      | 4/10 [01:33<02:20, 23.48s/it]

[5,     1] loss: 0.001
[5,   301] loss: 0.345
[5,   601] loss: 0.345


 50%|█████     | 5/10 [01:56<01:57, 23.44s/it]

[6,     1] loss: 0.001
[6,   301] loss: 0.345
[6,   601] loss: 0.345


 60%|██████    | 6/10 [02:20<01:34, 23.52s/it]

[7,     1] loss: 0.001
[7,   301] loss: 0.345
[7,   601] loss: 0.345


 70%|███████   | 7/10 [02:44<01:10, 23.65s/it]

[8,     1] loss: 0.001
[8,   301] loss: 0.345
[8,   601] loss: 0.345


 80%|████████  | 8/10 [03:08<00:47, 23.83s/it]

[9,     1] loss: 0.001
[9,   301] loss: 0.345
[9,   601] loss: 0.345


 90%|█████████ | 9/10 [03:32<00:23, 23.85s/it]

[10,     1] loss: 0.001
[10,   301] loss: 0.345
[10,   601] loss: 0.345


100%|██████████| 10/10 [03:56<00:00, 23.64s/it]

Training is finished!





In [119]:
data_iter = iter(test_loader)
true_classes = list()

for it in data_iter:
    true_classes.extend(it[1])

true_classes = np.array(true_classes)
len(true_classes), true_classes[:10]

(10000, array([3, 8, 8, 0, 6, 6, 1, 6, 3, 1]))

In [120]:
net.eval()
data_iter = iter(test_loader)
pred_classes = list()

for it in data_iter:
    outputs = net(images)
    _, predicted = torch.max(outputs, 1)
    pred_classes.extend(predicted)

# pred_classes = np.array([classes[predicted[i]] for i in range(len(labels))])
pred_classes = np.array(pred_classes)
len(pred_classes), pred_classes[:10]

(10000, array([0, 0, 0, 0, 0, 9, 5, 9, 0, 0]))

In [121]:
print(f'Accuracy = {(true_classes == pred_classes).sum() / len(true_classes)}')

Accuracy = 0.0976


Аугментация для полносвязанных слоев не заметно, чтобы эффективно работала. Вернулись почти к первоначальным результатам.