In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Урок 2. Feed-forward neural network

Итак, давайте потренируемся тренировать нейронные сети прямого распространения (так, как делали на паре). При этом попробуем создать свою функцию активации на одном из слоёв

In [2]:
# Сделаем необходимые импорты

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

In [3]:
# Загрузим датасет CIFAR-100, сразу же создадим dataloader для него
# Если вам не хватает вычислительных ресурсов, то можно вернуться к CIFAR-10

train_dataset = torchvision.datasets.CIFAR100(root='data/',
                                              train=True,  
                                              transform=transforms.ToTensor(), 
                                              download=True)

image, label = train_dataset[0]
print(image.size())
print(label)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to data/cifar-100-python.tar.gz


  0%|          | 0/169001437 [00:00<?, ?it/s]

Extracting data/cifar-100-python.tar.gz to data/
torch.Size([3, 32, 32])
19


In [4]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=64, 
                                           shuffle=True)

In [5]:
test_dataset = torchvision.datasets.CIFAR100(root='./data',
                                             train=False,
                                             transform=transforms.ToTensor(),
                                             download=True)

test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=4,
                                          shuffle=False)

Files already downloaded and verified


In [6]:
# Создайте собственную архитектуру! Можете использовать всё, что угодно, но в ограничении на использование линейные слои (пока без свёрток)
# Давайте добавим ограниченный Leaky_relu, то есть output = max(0.1x, 0.5x)
# Ваша задача добавить его в архитектуру сети как функцию активации

class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 8 * hidden_dim)
        self.fc2 = nn.Linear(8 * hidden_dim, 4 * hidden_dim)
        self.fc3 = nn.Linear(4 * hidden_dim, 2 * hidden_dim)
        self.fc4 = nn.Linear(2 * hidden_dim, hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc2(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc3(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc4(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc5(x)
        return x

    def predict(self, x):
        x = x.view(x.shape[0], -1)
        x = self.fc1(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc2(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc3(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc4(x)
        x = F.leaky_relu(x, 0.1)
        x = self.fc5(x)
        x = F.softmax(x)
        return x

net = Net(3072, 100, 100)

In [7]:
# Запустить обучение (по аналогии с тем, что делали на паре)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [8]:
for epoch in tqdm(range(10)):  
    running_loss = 0.0
    
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]

        # обнуляем градиент
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        
        if i % 300 == 0:    # печатаем каждые 300 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Training is finished!')

  0%|          | 0/10 [00:00<?, ?it/s]

[1,     1] loss: 0.002
[1,   301] loss: 0.691
[1,   601] loss: 0.691


 10%|█         | 1/10 [00:21<03:11, 21.32s/it]

[2,     1] loss: 0.002
[2,   301] loss: 0.691
[2,   601] loss: 0.690


 20%|██        | 2/10 [00:42<02:47, 20.96s/it]

[3,     1] loss: 0.002
[3,   301] loss: 0.690
[3,   601] loss: 0.690


 30%|███       | 3/10 [01:02<02:25, 20.76s/it]

[4,     1] loss: 0.002
[4,   301] loss: 0.689
[4,   601] loss: 0.688


 40%|████      | 4/10 [01:23<02:05, 20.85s/it]

[5,     1] loss: 0.002
[5,   301] loss: 0.682
[5,   601] loss: 0.673


 50%|█████     | 5/10 [01:44<01:43, 20.78s/it]

[6,     1] loss: 0.002
[6,   301] loss: 0.650
[6,   601] loss: 0.642


 60%|██████    | 6/10 [02:04<01:22, 20.53s/it]

[7,     1] loss: 0.002
[7,   301] loss: 0.627
[7,   601] loss: 0.621


 70%|███████   | 7/10 [02:24<01:01, 20.42s/it]

[8,     1] loss: 0.002
[8,   301] loss: 0.615
[8,   601] loss: 0.611


 80%|████████  | 8/10 [02:44<00:40, 20.43s/it]

[9,     1] loss: 0.002
[9,   301] loss: 0.608
[9,   601] loss: 0.605


 90%|█████████ | 9/10 [03:05<00:20, 20.38s/it]

[10,     1] loss: 0.002
[10,   301] loss: 0.603
[10,   601] loss: 0.600


100%|██████████| 10/10 [03:25<00:00, 20.56s/it]

Training is finished!



