# DL 1, Pytorch

Привет! На этом занятии мы познакомимся с PyTorch, с работой gpu и получением градиентов. В конце попробуем написать нейросеть для распознавания чисел.

**Для этого ноутбука надо включить GPU runtime**.

In [None]:
import torch

In [None]:
torch.sqrt()

In [None]:
torch.*Tensor?

## Simple Pytorch

Изучим простые методы pytorch. Они сильно напоминают numpy, и часто можно использовать имя метода из numpy в pytorch.

In [None]:
t = torch.Tensor(2, 3, 4)

In [None]:
t

In [None]:
t.shape

In [None]:
t.random_(10)
t

In [None]:
t = torch.zeros_like(t)
t

In [None]:
r = torch.Tensor(t)
r.resize_(3, 8)
r

In [None]:
a, b = torch.rand(1, 4), torch.rand(1, 4)
a + b

In [None]:
a * b

In [None]:
a @ b.T

In [None]:
a, b = torch.rand(3, 4), torch.rand(4, 5)
a @ b

In [None]:
a.norm(), b.sum()

In [None]:

a / 10

In [None]:
a.transpose(0, 1)

In [None]:
t = torch.arange(10)
t.dtype

In [None]:
t = t.to(torch.float32)
t, t.dtype

## Autograd & GPU

Теперь поработаем с GPU и получением градиентов.

In [None]:
a, b = torch.rand(3, 4), torch.rand(4, 5)
a

In [None]:
a = a.to("cuda")
b = b.to('cuda')

In [None]:
!nvidia-smi

In [None]:
a @ b

In [None]:
b = b.to("cuda")

In [None]:
a @ b

In [None]:
a.requires_grad_(True)

s = (a @ b).sum()
s

In [None]:
s.backward()
s

In [None]:
a, b

In [None]:
a.grad, b.grad

In [None]:
a, b = torch.rand(3, 4, requires_grad=True), torch.rand(4, 5, requires_grad=True)
s = (a @ b).sum()
s.backward()

In [None]:
a, b

## Neural Network


Давайте определим простую ML-задачу и попробуем решить её с помощью нейросети.

In [None]:
X = torch.rand(1000, 10)
w_true = torch.rand(10, 1) * 10
b_true = torch.tensor(3.1415926)
eps = torch.rand(1000) * 1e-3
y = X @ w_true + b_true + eps

In [None]:
w = torch.rand(10, 1, requires_grad=True)
b = torch.rand(1, requires_grad=True)

In [None]:
y_hat = X @ w + b
L = ((y_hat - y) ** 2).mean()
L

In [None]:
L.backward()

In [None]:
lr = 1e-2

with torch.no_grad():
    w -= w.grad * lr
    b -= b.grad * lr

In [None]:
for idx in range(5000):
    w.grad = None
    b.grad = None
    y_hat = X @ w + b
    L = ((y_hat - y) ** 2).mean()
    L.backward()
    with torch.no_grad():
        w -= w.grad * lr
        b -= b.grad * lr
    if idx % 100 == 0:
        print(f"Current Loss: {L}")

In [None]:
torch.norm(w_true - w), torch.norm(b_true - b)

In [None]:
w_true.tolist(), w.tolist()

In [None]:
b_true, b

Воспользуемся высокоуровневым способ описывать нейросети в PyTorch:

In [None]:
class Linear(torch.nn.Module):
    def __init__(self, in_shape, out_shape):
        super().__init__()

        self.layer = torch.nn.Linear(in_shape, out_shape)
    
    def forward(self, x):
        return self.layer(x)

In [None]:
model = Linear(10, 1)
optimizer = torch.optim.SGD(model.parameters(), lr)
criterion = torch.nn.MSELoss()

In [None]:
model = model.to("cuda")

In [None]:
for idx in range(5000):
    optimizer.zero_grad()
    y_hat = model(X.to("cuda")) # optimize!
    L = criterion(y_hat, y.to("cuda"))
    L.backward()
    optimizer.step()
    if idx % 100 == 0:
        print(f"Current Loss: {L}")

In [None]:
list(model.parameters())

In [None]:
# compare matrices

## MNIST

Перейдем к другой задаче -- распознавании чисел. Загрузим датасет MNIST и напишем нейросеть, которая отличает числа меньше 5 и больше или равно 5.

In [None]:
from keras.datasets import mnist


(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [None]:
print("\n".join(" ".join("X" if ch > 128 else "." for ch in row) for row in train_X[1].tolist()))

Создадим датасет для нашей задачи:

In [None]:
X_train_tensor = torch.as_tensor(train_X).to('cuda') / 255
X_test_tensor = torch.as_tensor(test_X).to('cuda') / 255

y_train_tensor = torch.as_tensor(train_y).to('cuda') / 255
y_test_tensor = torch.as_tensor(test_y).to('cuda') / 255

Опишем простую двухслойную сеть:

In [None]:
class NNClassifier(torch.nn.Module):
    def __init__(self):
      super().__init__()

      self.layer_0 = torch.nn.Linear(28*28, 100)
      self.act_0 = torch.nn.Sigmoid()
      self.layer_1 = torch.nn.Linear(100, 1)
      self.act_1 = torch.nn.Sigmoid()
    
    def forward(self, x):
      return self.act_1(self.layer_1(self.act_0(self.layer_0(x))))

Напишем для неё Loss-функцию

(подсказка: вспомните logistic regression)

In [None]:
class NLLLoss(torch.nn.Module):
    def __init__(self):
      super().__init__()

    def forward(self, y_pred, y_true):
      return - (y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred)).mean()

Повторим цикл обучения для новой сети!

In [None]:
model = NNClassifier()
criterion = NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

model.to('cuda')

In [None]:
X_train_tensor.shape

In [None]:
LIMIT = 30000

for idx in range(500):
    optimizer.zero_grad()
    y_hat = model.forward(X_train_tensor[:LIMIT].reshape(LIMIT, -1))
    L = criterion(y_hat, y_train_tensor[:LIMIT])
    L.backward()
    optimizer.step()
    if idx % 100 == 0:
        print(f"Current Loss: {L}")