# Семинар 2

## План ноутбука

1. Высокоуровневое API для обучение нейросетей в `PyTorch`
2. Обучение первой нейросети в `PyTorch`

## Высокоуровневое API для обучение нейросетей в `PyTorch`

In [None]:
import torch
import torch.nn as nn

### Создание объекта нейросети

In [None]:
net = nn.Sequential(
    nn.Linear(700, 500),
    nn.ReLU(),
    nn.Linear(500, 200),
    nn.ReLU(),
    nn.Linear(200, 10)
)

In [None]:
net

In [None]:
from collections import OrderedDict

net = nn.Sequential(
    OrderedDict(
        [
            ('linear1', nn.Linear(700, 500)),
            ('relu1', nn.ReLU()),
            ('linear2', nn.Linear(500, 200)),
            ('relu2', nn.ReLU()),
            ('linear3', nn.Linear(200, 10))
        ]
    )
)

In [None]:
net

In [None]:
net.linear1

In [None]:
input_tensor = torch.rand(6, 700)

net(input_tensor).shape

In [None]:
# необходимо отнаследоваться от nn.Module и определить методы __init__ и forward

class CustomTaskNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear1 = nn.Linear(700, 500)
        self.linear3 = nn.Linear(500, 10)
        self.linear2 = nn.Linear(500, 500)
        
        self.activation = nn.ReLU()

    def forward(self, x):
        output = self.activation(self.linear1(x))
        output = self.activation(self.linear2(output))
        output = self.activation(self.linear2(output))
        output = self.linear3(output)

        return output

In [None]:
net = CustomTaskNetwork()

In [None]:
net

In [None]:
net(input_tensor).shape

In [None]:
net.to(torch.device('cuda:0'))

In [None]:
net.linear1.weight.device

In [None]:
net(input_tensor).shape

In [None]:
net(input_tensor.cuda()).shape

In [None]:
net.cpu()

In [None]:
net.train()

In [None]:
net.training

In [None]:
net.eval()

In [None]:
net.training

In [None]:
net.linear1.weight

In [None]:
list(net.parameters())

In [None]:
net.state_dict()

In [None]:
torch.save(net.state_dict(), 'model.pt')

In [None]:
torch.load('model.pt')

In [None]:
net.load_state_dict(torch.load('model.pt'))

In [None]:
torch.save(torch.rand(100, 100), "tensor.pt")

In [None]:
! ls -la

In [None]:
torch.load("tensor.pt")

### Оптимизаторы

In [None]:
from torch import optim

In [None]:
optim.SGD, optim.Adam

In [None]:
optimizer = optim.Adam(net.parameters(), betas=(0.9, 0.999), lr=1e-3)

In [None]:
optimizer

In [None]:
optimizer = optim.SGD(
    [
        {'params': net.linear1.parameters()},
        {'params': net.linear2.parameters(), 'lr': 1e-3}
    ],
    lr=1e-2,
    momentum=0.9
)

In [None]:
optimizer

In [None]:
optimizer.step()

In [None]:
optimizer.zero_grad(set_to_none=True)

### Функции потерь

In [None]:
nn.L1Loss, nn.MSELoss, nn.CrossEntropyLoss, nn.NLLLoss

In [None]:
loss = nn.MSELoss()

In [None]:
loss

In [None]:
x = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)

output = loss(x, target)

print(output)

output.backward()

In [None]:
x.grad

### Датасеты и даталоадеры

In [None]:
from torch.utils.data import Dataset
from torch.utils.data import TensorDataset

In [None]:
n_features = 2
n_objects = 300

torch.manual_seed(0);

In [None]:
w_true = torch.randn(n_features, 1)

X = (torch.rand(n_objects, n_features) - 0.5) * 10
X *= (torch.arange(n_features) * 2 + 1)

Y = X @ w_true
Y += torch.rand_like(Y)

w_true.shape, X.shape, Y.shape

In [None]:
X

In [None]:
Y

In [None]:
w = torch.rand_like(w_true)

w

In [None]:
idx = torch.randint(low=0, high=len(X), size=(10,))

print(idx)

X[idx]

In [None]:
X[idx] @ w

In [None]:
X[idx] @ w - Y[idx]

In [None]:
dataset = TensorDataset(X, Y)

In [None]:
dataset[7]

In [None]:
X[7], Y[7]

In [None]:
# надо отнаследоваться от Dataset и определить методы __init__, __len__ и __getitem__

class CustomDataset(Dataset):
    def __init__(self, w_true, n_features, n_objects):
        self.X = (torch.rand(n_objects, n_features) - 0.5) * 10
        self.X *= (torch.arange(n_features) * 2 + 1)

        self.Y = self.X @ w_true
        self.Y += torch.rand_like(self.Y)

    def __len__(self):
        return len(self.Y)

    def __getitem__(self, item):
        return self.X[item], self.Y[item]

In [None]:
dataset = CustomDataset(w_true, n_features, n_objects)

In [None]:
dataset[7]

In [None]:
dataset.X[7]

In [None]:
from torch.utils.data import DataLoader

In [None]:
loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True)

In [None]:
X.shape

In [None]:
for x, y in loader:
    print(f"{x=}\t{x.shape=}")
    print(f"{y=}\t{y.shape=}")
    break

### Общая структура обучения модели

In [None]:
model.train()

for x, y in dataloader:
    optimizer.zero_grad()

    output = model(x)

    loss = loss_fn(output, y)

    loss.backward()

    optimizer.step()

In [None]:
from tqdm import tqdm

In [None]:
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Optimizer


def train(model: nn.Module, data_loader: DataLoader, optimizer: Optimizer, loss_fn):
    model.train()

    total_loss = 0

    for i, (x, y) in enumerate(tqdm(data_loader)):
        optimizer.zero_grad()

        output = model(x)

        loss = loss_fn(output, y)

        loss.backward()

        total_loss += loss.item()

        optimizer.step()

    return total_loss / len(data_loader)


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


@torch.inference_mode()
def evaluate(model: nn.Module, data_loader: DataLoader, loss_fn):
    model.eval()

    total_loss = 0

    for i, (x, y) in enumerate(tqdm(data_loader)):
        output = model(x)

        loss = loss_fn(output, y)

        total_loss += loss.item()

    return total_loss / len(data_loader)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns


sns.set(style='darkgrid')


def plot_stats(
    train_loss: list[float],
    valid_loss: list[float],
    title: str
):
    plt.figure(figsize=(16, 8))

    plt.title(title + ' loss')

    plt.plot(train_loss, label='Train loss')
    plt.plot(valid_loss, label='Valid loss')

    plt.legend()

    plt.ylabel("Loss")
    plt.xlabel("Epoch")

    plt.show()

In [None]:
from IPython.display import clear_output


def fit(model, train_loader, valid_loader, optimizer, loss_fn, num_epochs, title):
    train_loss_history, valid_loss_history = [], []

    for epoch in range(num_epochs):
        train_loss = train(model, train_loader, optimizer, loss_fn)
        valid_loss = evaluate(model, valid_loader, loss_fn)

        train_loss_history.append(train_loss)
        valid_loss_history.append(valid_loss)

        clear_output()

        plot_stats(train_loss_history, valid_loss_history, title)

## Обучение первой нейросети в `PyTorch`

In [None]:
class CustomTaskNetwork(nn.Module):
    def __init__(self):
        super().__init__()

        self.linear = nn.Linear(n_features, 1)

    def forward(self, x):
        return self.linear(x)


model = CustomTaskNetwork()

optimizer = optim.Adam(model.parameters(), lr=1e-2)

loss_fn = nn.MSELoss()

In [None]:
from torch.utils.data import random_split


dataset = TensorDataset(X, Y)

train_dataset, valid_dataset = random_split(
    dataset,
    (int(len(dataset) * 0.8), len(dataset) -  int(len(dataset) * 0.8)),
    generator=torch.Generator().manual_seed(300)
)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=10, shuffle=False)

In [None]:
fit(model, train_loader, valid_loader, optimizer, loss_fn, 20, 'Simple fc')

## BatchNorm и Dropout

In [None]:
dropout = nn.Dropout(p=0.1)

dropout

In [None]:
x = torch.rand(3, 7)

x

In [None]:
x / 0.9

In [None]:
dropout.train()

for _ in range(3):
    print(dropout(x))

In [None]:
dropout.eval()

dropout(x)

In [None]:
batch_norm = nn.BatchNorm1d(num_features=7)

batch_norm

In [None]:
x = torch.rand(3, 7)

x

In [None]:
batch_norm(x)

In [None]:
batch_norm.weight

In [None]:
batch_norm.bias

In [None]:
batch_norm.running_mean

In [None]:
batch_norm.running_var

In [None]:
batch_norm.num_batches_tracked

In [None]:
batch_norm(x)

batch_norm.num_batches_tracked

In [None]:
batch_norm.eval()

batch_norm(x)

In [None]:
batch_norm = nn.BatchNorm1d(num_features=7)

batch_norm.eval()

batch_norm(x)