## Домашка 

### Задание 1 (8 баллов)
Обучите модель с минимум 15 слоями, где у каждого слоя разные параметры (Dropout, Conv1d и Pooling, Dense считаются слоями, остальное нет, но их тоже можно использовать). Как минимум 4 слоя должны быть наложены друг на друга и как минимум 2 параллельных слоя (последовательности слоев). Должен быть хотя бы один слой каждого типа.

При обучении используйте колбек для отслеживания лучшей модели. Ориентируйтесь на Recall@Precision меру. Качество модели не должно быть околонулевым. Если метрики не растут, то попробуйте пообучать подольше или перестроить саму сеть.

Советы: Начните с небольших сетей и постепенно добавляйте, не пытайтесь сразу собрать все слои. Иногда кернел может крашиться просто так или из-за слишком больших матриц.



In [1]:
import os
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam

device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print(f"Using device: {device}")

Using device: mps


In [2]:
def preprocess(text):
    tokens = text.lower().split()
    tokens = [token.strip(".,!?;:'\"()[]{}") for token in tokens]
    return tokens

data = pd.read_csv("lenta_40k.csv.zip")


In [3]:
vocab = Counter()
for t in data.text:
    vocab.update(preprocess(t))
filtered = {w for w,c in vocab.items() if c > 30}

word2id = {"PAD":0, "UNK":1}
for w in sorted(filtered):
    word2id[w] = len(word2id)
id2word = {i:w for w,i in word2id.items()}

In [4]:
X = []
for t in data.text:
    ids = [word2id.get(tok,1) for tok in preprocess(t)]
    X.append(ids)
MAX_LEN = int(np.median([len(x) for x in X]) + 30)
X = [x[:MAX_LEN] + [0]*max(0, MAX_LEN-len(x)) for x in X]
X = np.array(X, dtype=int)

In [5]:
label2id = {l:i for i,l in enumerate(sorted(set(data.topic)))}
y = np.array([label2id[l] for l in data.topic], dtype=int)
num_classes = len(label2id)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.05,
                                                  stratify=y, random_state=42)


In [6]:
class TextDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self): return len(self.y)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

train_ds = TextDataset(X_train, y_train)
val_ds   = TextDataset(X_val,   y_val)
batch_size = 256
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl   = DataLoader(val_ds,   batch_size=batch_size)

In [7]:
def recall_at_precision(outputs, targets, threshold=0.8):
    probs = torch.softmax(outputs, dim=1)
    confidences, preds = probs.max(dim=1)
    preds_binary = confidences >= threshold
    tp = ((preds == targets) & preds_binary).sum().item()
    fp = (preds_binary & (preds != targets)).sum().item()
    fn = ((preds != targets) & ~preds_binary).sum().item()
    precision = tp / (tp + fp + 1e-8)
    recall    = tp / (tp + fn + 1e-8)
    return recall, precision

In [8]:
class Task1Model(nn.Module):
    def __init__(self, vocab_size, emb_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)

        self.branch1 = nn.Sequential(
            nn.Conv1d(emb_dim, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2)
        )
        self.branch2 = nn.Sequential(
            nn.Conv1d(emb_dim, 32, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.AvgPool1d(2),
            nn.Dropout(0.3)
        )
        self.branch3 = nn.Sequential(
            nn.Conv1d(emb_dim, 32, kernel_size=7, padding=3),
            nn.ReLU(),
            nn.MaxPool1d(2),
            nn.Dropout(0.2)
        )
        self.stack = nn.Sequential(
            nn.Conv1d(96, 64, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv1d(64, 64, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv1d(64, 32, kernel_size=3, padding=1), nn.ReLU(),
            nn.Dropout(0.4)
        )
        self.fc1 = nn.Linear(32*(MAX_LEN//2), 64)
        self.dropout_fc = nn.Dropout(0.5)
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.embedding(x).permute(0,2,1)  # B x emb_dim x L
        b1 = self.branch1(x)
        b2 = self.branch2(x)
        b3 = self.branch3(x)
        concat = torch.cat([b1,b2,b3], dim=1)
        out = self.stack(concat)
        out = out.flatten(1)
        out = F.relu(self.fc1(out))
        out = self.dropout_fc(out)
        return self.fc2(out)


In [9]:
def train_model(model, train_dl, val_dl, epochs=10):
    model.to(device)
    opt = Adam(model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()
    best_rec = 0.0
    for epoch in range(1, epochs+1):

        model.train()
        for xb,yb in train_dl:
            xb,yb = xb.to(device), yb.to(device)
            preds = model(xb)
            loss = criterion(preds, yb)
            opt.zero_grad(); loss.backward(); opt.step()

        model.eval()
        recs, precs = [], []
        with torch.no_grad():
            for xb,yb in val_dl:
                xb,yb = xb.to(device), yb.to(device)
                out = model(xb)
                r,p = recall_at_precision(out, yb, threshold=0.8)
                recs.append(r); precs.append(p)
        mean_rec, mean_prec = np.mean(recs), np.mean(precs)
        print(f"Epoch {epoch}: Recall@0.8Prec={mean_rec:.4f}, Precision={mean_prec:.4f}")
        if mean_rec > best_rec:
            best_rec = mean_rec
            torch.save(model.state_dict(), "best_task1.pt")
            print("  -> Saved best model")
    print(f"Best Recall@Precision: {best_rec:.4f}")


In [10]:
model1 = Task1Model(len(word2id), emb_dim=100, num_classes=num_classes)
train_model(model1, train_dl, val_dl, epochs=15)

Epoch 1: Recall@0.8Prec=0.0000, Precision=0.0000
Epoch 2: Recall@0.8Prec=0.0695, Precision=0.9724
  -> Saved best model
Epoch 3: Recall@0.8Prec=0.1619, Precision=0.9702
  -> Saved best model
Epoch 4: Recall@0.8Prec=0.3600, Precision=0.9305
  -> Saved best model
Epoch 5: Recall@0.8Prec=0.4171, Precision=0.9292
  -> Saved best model
Epoch 6: Recall@0.8Prec=0.4871, Precision=0.9248
  -> Saved best model
Epoch 7: Recall@0.8Prec=0.5168, Precision=0.9085
  -> Saved best model
Epoch 8: Recall@0.8Prec=0.5274, Precision=0.9185
  -> Saved best model
Epoch 9: Recall@0.8Prec=0.5443, Precision=0.9216
  -> Saved best model
Epoch 10: Recall@0.8Prec=0.5659, Precision=0.9140
  -> Saved best model
Epoch 11: Recall@0.8Prec=0.6162, Precision=0.8882
  -> Saved best model
Epoch 12: Recall@0.8Prec=0.6385, Precision=0.8787
  -> Saved best model
Epoch 13: Recall@0.8Prec=0.6498, Precision=0.8748
  -> Saved best model
Epoch 14: Recall@0.8Prec=0.6618, Precision=0.8727
  -> Saved best model
Epoch 15: Recall@0.8Pre


### Задание 2 (2 балла)
Обучите нейросеть со сверточными слоями с архитектурой похожей на Unet - https://en.wikipedia.org/wiki/U-Net

Не нужно воспроизводить все в точности, главное, чтобы было сокращение длины последовательности с помощью CNN, а затем обратное увеличение длины последовательности до изначальной с residual связями между промежуточными шагами с одинаковыми размерностями. 
Изменений размерности должно быть хотя бы 3 и соответственно residual связей тоже. 

Для повышения размерности используйте keras.layers.UpSampling1D
Полученная модель должна давать ненулевое качество на той же самой задаче классификации текстов.

In [14]:
class UNet1D(nn.Module):
    def __init__(self, vocab_size, emb_dim, num_classes):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb_dim, padding_idx=0)

        self.down1 = nn.Sequential(nn.Conv1d(emb_dim, 64, 3, padding=1), nn.ReLU())
        self.pool1 = nn.MaxPool1d(2)
        self.down2 = nn.Sequential(nn.Conv1d(64, 128, 3, padding=1), nn.ReLU())
        self.pool2 = nn.MaxPool1d(2)
        self.down3 = nn.Sequential(nn.Conv1d(128, 256, 3, padding=1), nn.ReLU())
        self.pool3 = nn.MaxPool1d(2)

        self.bottleneck = nn.Sequential(nn.Conv1d(256, 512, 3, padding=1), nn.ReLU())

        self.up3 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_up3 = nn.Sequential(nn.Conv1d(512+256, 256, 3, padding=1), nn.ReLU())
        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_up2 = nn.Sequential(nn.Conv1d(256+128, 128, 3, padding=1), nn.ReLU())
        self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv_up1 = nn.Sequential(nn.Conv1d(128+64, 64, 3, padding=1), nn.ReLU())

        self.pool = nn.AdaptiveMaxPool1d(1)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.embed(x).permute(0,2,1)
        d1 = self.down1(x); p1 = self.pool1(d1)
        d2 = self.down2(p1); p2 = self.pool2(d2)
        d3 = self.down3(p2); p3 = self.pool3(d3)
        bn = self.bottleneck(p3)
        u3 = self.up3(bn)
        c3 = self.conv_up3(torch.cat([u3, d3], dim=1))
        u2 = self.up2(c3)
        c2 = self.conv_up2(torch.cat([u2, d2], dim=1))
        u1 = self.up1(c2)
        c1 = self.conv_up1(torch.cat([u1, d1], dim=1))
        out = self.pool(c1).squeeze(-1)
        return self.fc(out)

model2 = UNet1D(len(word2id), emb_dim=100, num_classes=num_classes)
train_model(model2, train_dl, val_dl, epochs=15)

Epoch 1: Recall@0.8Prec=0.0643, Precision=1.0000
  -> Saved best model
Epoch 2: Recall@0.8Prec=0.2509, Precision=0.9321
  -> Saved best model
Epoch 3: Recall@0.8Prec=0.4075, Precision=0.8979
  -> Saved best model
Epoch 4: Recall@0.8Prec=0.5131, Precision=0.9086
  -> Saved best model
Epoch 5: Recall@0.8Prec=0.5969, Precision=0.8830
  -> Saved best model
Epoch 6: Recall@0.8Prec=0.6214, Precision=0.8703
  -> Saved best model
Epoch 7: Recall@0.8Prec=0.6594, Precision=0.8674
  -> Saved best model
Epoch 8: Recall@0.8Prec=0.6797, Precision=0.8491
  -> Saved best model
Epoch 9: Recall@0.8Prec=0.7163, Precision=0.8170
  -> Saved best model
Epoch 10: Recall@0.8Prec=0.7342, Precision=0.8147
  -> Saved best model
Epoch 11: Recall@0.8Prec=0.7547, Precision=0.7902
  -> Saved best model
Epoch 12: Recall@0.8Prec=0.7860, Precision=0.7591
  -> Saved best model
Epoch 13: Recall@0.8Prec=0.7892, Precision=0.7643
  -> Saved best model
Epoch 14: Recall@0.8Prec=0.8226, Precision=0.7480
  -> Saved best model
E