In [1]:
import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

# Импорт необходимых библиотек
import os
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from tqdm import tqdm

from transformers import AutoTokenizer, AutoModel,AutoModelForMaskedLM
from sklearn.model_selection import train_test_split
import torch
import torch.nn.functional as F
from torch import Tensor
from einops import rearrange
from typing import Tuple, Callable
from torch.autograd import Function
import gc
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import root_mean_squared_error

from torch.utils.data import Dataset, DataLoader 
pd.set_option('display.max_columns', None)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Данные

In [None]:
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')

In [None]:
def preprocess(df):
    df['activation_date'] = pd.to_datetime(df['activation_date'])

    df['day'] = df['activation_date'].dt.day
    df['month'] = df["activation_date"].dt.month
    df['year'] = df["activation_date"].dt.year
    df['weekday'] = df['activation_date'].dt.weekday
    df["dayofyear"] = df['activation_date'].dt.dayofyear
    df.drop(columns=['activation_date', 'item_id'], inplace=True)
    df['param_1'] = df['param_1'].fillna('')
    df['param_2'] = df['param_2'].fillna('')
    df['param_3'] = df['param_3'].fillna('')
    df['description'] = df['description'].fillna('')
    return df

In [3]:
class Dataset_avito(): 
    def __init__(self, part='train', len_1=15034, len_2=15034): 
        train = pd.read_csv('../data/train.csv')
        train_1 = train[train.deal_probability != 0.0].iloc[0:len_1]
        train_2 = train[train.deal_probability == 0.0].iloc[0:len_2]
        #train = train.iloc[0:15034]
        train = pd.concat([train_1, train_2])
        train = preprocess(train)
        X_train, X_val, y_train, y_val = train_test_split(train.drop(columns=['deal_probability', 'image']), train['deal_probability'], test_size=0.2, random_state=42)
        self.x = X_train if part == 'train' else X_val
        self.y = y_train if part == 'train' else y_val
        self.n_samples = X_train.shape[0] if part == 'train' else X_val.shape[0]
        self.text = list(self.x.apply(lambda item: '\n'.join([ item["title"], str(item["description"]), item["region"], item["city"], item["parent_category_name"], item["category_name"], ('' if item["param_1"] is None else str(item["param_1"])), ('' if item["param_2"] is None else str(item["param_2"])), ('' if item["param_3"] is None else str(item["param_3"]))]), axis=1).values)
        user_type_dict = {'Private': 0, 'Company': 1, 'Shop': 2}
        self.tabular = list(self.x.apply(lambda item: torch.tensor([item["item_seq_number"], item["day"], item["month"], item["year"], item["weekday"], item["dayofyear"], user_type_dict[item["user_type"]], 0.0 if item["price"] is None else item["price"]]), axis=1).values)

    def __getitem__(self, index): 
        return self.tabular[index], self.text[index], np.array(self.y)[index] 
        
    def __len__(self): 
        return self.n_samples

In [4]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_avito('train'), batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset=Dataset_avito('val'), batch_size=BATCH_SIZE, shuffle=False)

## Обучение

In [4]:
from dataclasses import dataclass
from typing import ClassVar
from typing import List, Dict, Any, Tuple, Optional
@dataclass
class ModelTrainer:
    model: 'typing.Any'
    train_dataloader: DataLoader
    val_dataloader: DataLoader
    device: torch.device
    epochs: int
    round_loss: int
    round_rmse: int

    optimizer: torch.optim
    loss_fn: 'typing.Any'
    
    step: str = "tabular"
    
    patience: int = 10 # Ранняя остановка обучения

    def __post_init__(self):        
        # История обучения и тестирования
        self.__history = pd.DataFrame({
            "train_avg": [], # Средние метрики на тренировочной выборке
            "val_avg": [], # Средние метрики на валидационной выборке
            "train_loss": [], # Loss на тренировочной выборке
            "val_loss": [], # Loss на валидационной выборке
        })

        # Количество шагов в одной эпохе
        self.__train_steps = len(self.train_dataloader)
        self.__val_steps = len(self.val_dataloader)

        self.__best_val_avg = 0
        self.__no_improvement_count = 0
        
        self.loss_fn = self.loss_fn

    @property
    def history(self) -> pd.DataFrame:
        """Получение DataFrame историей обучения и тестирования

        Returns:
            pd.DataFrame: **DataFrame** c историей обучения и тестирования
        """

        return self.__history

    @classmethod
    def _is_best_model(self, dev_avg: float) -> bool:
        """Проверка, является ли текущая модель лучшей на основе метрик валидации

        Args:
            test_accuracy (float): Текущая точность тестирования

        Returns:
            bool: True, если текущая модель лучшая, иначе False
        """

        try:
            min_val_avg = min(self.__history["val_avg"])
        except ValueError:
            min_val_avg = 10**10
        return dev_avg < min_val_avg

    def _save_model(self, epoch: int, path_to_model: str, test_rmse: float, loss: torch.Tensor) -> None:
        """Сохранение модели

        Args:
            epoch (int): Текущая эпоха
            path_to_model (str): Путь для сохранения модели
            test_rmse (float): rmse на тестовой выборке
            loss (torch.Tensor): Значение потерь
        """
        
        os.makedirs(path_to_model, exist_ok = True)
        self._best_model_name = f"{self.model.__class__.__name__}_{epoch}_{test_rmse}_checkpoint.pth"

        torch.save({
            "epoch": epoch,
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "test_loss": loss,
        }, os.path.join(path_to_model, f"{self.model.__class__.__name__}_{epoch}_{test_rmse}_checkpoint.pth"))
    
    # Процесс обучения
    def train(self, path_to_model: str) -> None:
        """Процесс обучения

        Args:
            path_to_model (str): Путь для сохранения моделей

        Returns:
            None
        """
        
        losses_train_list = []
        losses_val_list = []
        rmse_train_list = []
        rmse_val_list = []
        min_val_rmse = 10**10

        for epoch in range(1, self.epochs + 1):
            with torch.no_grad():
                torch.cuda.empty_cache()
            self.model.train() # Установка модели в режим обучения
            # Сумма Loss
            total_train_loss = 0
            total_val_loss = 0
            # Сумма rmse
            train_rmse = 0
            val_rmse = 0

            # Проход по всем тренировочным пакетам
            with tqdm(total = self.__train_steps, desc = f"Эпоха {epoch}", unit = "batch") as pbar_train:
                for batch, (tabular, text, targets) in enumerate(self.train_dataloader, 1):
                    if step == "tabular":
                        x = tabular.to(device)
                        x = torch.nan_to_num(x, nan=0.5)
                    elif step == "text":
                        text_embedding = []
                        for i in range(len(text)):
                            encoded_input = feature_extractor_tokenizer(text[i], padding=True, truncation=True, return_tensors='pt').to(device)
                            with torch.no_grad():
                                features = feature_extractor_model(**encoded_input)[0][0]
                            text_embedding.append(features.float())
                        x = torch.nn.utils.rnn.pad_sequence(text_embedding, batch_first=True)
                        x = x.to(device)
                    targets = targets.to(device)
                    logits = self.model(x)
                    if logits.isnan().sum() != 0:
                        print("logits")
                    logits = torch.nan_to_num(logits, nan=0.0)
                    loss = self.loss_fn(logits, targets.float()) # Ошибка предсказаний

                    # Обратное распространение для обновления весов
                    self.optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                    self.optimizer.step()
        
                    total_train_loss += loss.item() # Потеря
                    # RMSE
                    train_rmse += root_mean_squared_error(targets.cpu().detach().numpy(), logits.cpu().detach().numpy())
        
                    pbar_train.update(1)
                    with torch.no_grad():
                        torch.cuda.empty_cache()

                # Средняя потеря
                avg_train_loss = round(total_train_loss / batch, self.round_loss)
                losses_train_list.append(avg_train_loss)
        
                # RMSE
                train_rmse = round(train_rmse / len(self.train_dataloader.dataset) * 100, self.round_rmse)
                rmse_train_list.append(train_rmse)
        
                pbar_train.set_postfix({
                    "rmse": train_rmse,
                    "Средняя потеря": avg_train_loss
                })
            
            
            # Установка модели в режим предсказаний
            self.model.eval()
        
            # Предсказания на валидационной выборке
            with torch.no_grad():
                with tqdm(total = self.__val_steps, desc = f"Тестирование {epoch}", unit = "batch") as pbar_val:
                    for batch, (tabular, text, targets) in enumerate(self.val_dataloader, 1):
                        if step == "tabular":
                            x = tabular.to(device)
                            x = torch.nan_to_num(x, nan=0.0)
                            #x = x.unsqueeze(dim=1)
                        elif step == "text":
                            text_embedding = []
                            for i in range(len(text)):
                                encoded_input = feature_extractor_tokenizer(text[i], padding=True, truncation=True, return_tensors='pt').to(device)
                                with torch.no_grad():
                                    features = feature_extractor_model(**encoded_input)[0][0]
                                text_embedding.append(features.float())
                            x = torch.nn.utils.rnn.pad_sequence(text_embedding, batch_first=True)
                            x = x.to(device)
                        targets = targets.to(device)
                        logits = self.model(x)
                        logits = torch.nan_to_num(logits, nan=0.0)
                        loss = self.loss_fn(logits, targets.float()) # Ошибка предсказаний
                        
                        total_val_loss += loss.item() # Потеря
                        # RMSE
                        val_rmse += root_mean_squared_error(targets.cpu().detach().numpy(), logits.cpu().detach().numpy())
        
                        pbar_val.update(1)
                        with torch.no_grad():
                            torch.cuda.empty_cache()
                    # Средняя потеря
                    avg_val_loss = round(total_val_loss / batch, self.round_loss)
                    losses_val_list.append(avg_val_loss)
        
                    # RMSE
                    val_rmse = round(val_rmse / len(self.val_dataloader.dataset) * 100, self.round_rmse)
                    rmse_val_list.append(val_rmse)
                    
                    pbar_val.set_postfix({
                        "rmse": val_rmse,
                        "Средняя потеря": avg_val_loss
                    })
            
            if val_rmse < min_val_rmse:
                min_val_rmse = val_rmse
                self._save_model(epoch, path_to_model, round(val_rmse, self.round_rmse), avg_val_loss)
                self.__best_dev_avg = val_rmse
                self.__no_improvement_count = 0
            else:
                self.__no_improvement_count += 1

            if self.__no_improvement_count >= self.patience:
                print(f"Ранняя остановка на эпохе {epoch} из-за отсутствия улучшения точности на тестовой выборке")
                break

    # Получение хэш-значения
    def __hash__(self):
        return id(self)

In [5]:
EPOCHS = 20 # Количество эпох
BATCH_SIZE = 32 # Размер выборки (пакета)
LEARNING_RATE = 1e-4 # Скорость обучения
ROUND_RMSE = 2 # Знаков Accuracy после запятой
ROUND_LOSS = 7 # Знаков Loss после запятой
ROOT_DIR = os.path.join(".")
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_transformer_decision-level")

Экстрактор признаков из текста

In [6]:
feature_extractor_tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v3", code_revision='da863dd04a4e5dce6814c6625adfba87b83838aa', trust_remote_code=True)
feature_extractor_model = AutoModel.from_pretrained("jinaai/jina-embeddings-v3", code_revision='da863dd04a4e5dce6814c6625adfba87b83838aa', trust_remote_code=True).to(device)

flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn i

In [8]:
class TransformerModelWithAttention(nn.Module):
    def __init__(self, input_dim = 1024, hidden_dim=128, num_heads = 4, num_layers = 8, dropout = 0.1):
        super(TransformerModelWithAttention, self).__init__()
        self.in_layer = nn.Linear(input_dim, hidden_dim)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 10000, hidden_dim))
        encoder_layer = nn.TransformerEncoderLayer(d_model = hidden_dim, nhead = num_heads, dim_feedforward = hidden_dim, dropout = dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers = num_layers)
        self.fc_out = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = x.to(device)
        x = self.in_layer(x)
        batch_size, seq_len, _ = x.size()
        x = x + self.positional_encoding[:, :seq_len, :]
        encoder_output = self.transformer_encoder(x)
        x = encoder_output.mean(dim = 1)
        return self.fc_out(x).flatten()

### Предсказание на табличных данных

In [101]:
model_tabular = model = nn.Sequential(
    nn.Linear(8, 24),
    nn.ReLU(),
    nn.Linear(24, 12),
    nn.ReLU(),
    nn.Linear(12, 6),
    nn.ReLU(),
    nn.Linear(6, 1)
).to(device)
optimizer = optim.Adam(params = model_tabular.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "tabular"
trainer = ModelTrainer(model_tabular, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL + "_" + step)

Эпоха 1: 100%|██████████| 1880/1880 [00:16<00:00, 112.28batch/s, rmse=71.6, Средняя потеря=1.68e+4]
Тестирование 1: 100%|██████████| 470/470 [00:01<00:00, 290.71batch/s, rmse=1.09, Средняя потеря=0.145]
Эпоха 2: 100%|██████████| 1880/1880 [00:16<00:00, 116.46batch/s, rmse=1.02, Средняя потеря=0.165]
Тестирование 2: 100%|██████████| 470/470 [00:01<00:00, 282.14batch/s, rmse=0.86, Средняя потеря=0.0785]
Эпоха 3: 100%|██████████| 1880/1880 [00:16<00:00, 113.63batch/s, rmse=0.88, Средняя потеря=0.0872]
Тестирование 3: 100%|██████████| 470/470 [00:01<00:00, 290.05batch/s, rmse=0.87, Средняя потеря=0.0846]
Эпоха 4: 100%|██████████| 1880/1880 [00:16<00:00, 114.04batch/s, rmse=0.87, Средняя потеря=0.0822]
Тестирование 4: 100%|██████████| 470/470 [00:02<00:00, 210.17batch/s, rmse=0.83, Средняя потеря=0.0737]
Эпоха 5: 100%|██████████| 1880/1880 [00:16<00:00, 115.90batch/s, rmse=0.85, Средняя потеря=0.0766]
Тестирование 5: 100%|██████████| 470/470 [00:01<00:00, 286.61batch/s, rmse=0.85, Средняя п

Ранняя остановка на эпохе 17 из-за отсутствия улучшения точности на тестовой выборке





### Предсказание на текстовых данных

In [8]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_avito('train', train_len=15034), batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset=Dataset_avito('val', train_len=15034), batch_size=BATCH_SIZE, shuffle=False)

In [14]:
model_tabular = TransformerModelWithAttention(num_layers=2, input_dim=1024, hidden_dim=128, num_heads=2).to(device)
optimizer = optim.Adam(params = model_tabular.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_tabular, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL + "_" + step)

Эпоха 1: 100%|██████████| 376/376 [13:25<00:00,  2.14s/batch, rmse=0.85, Средняя потеря=0.0759]
Тестирование 1: 100%|██████████| 94/94 [03:18<00:00,  2.11s/batch, rmse=0.77, Средняя потеря=0.0633]
Эпоха 2: 100%|██████████| 376/376 [13:23<00:00,  2.14s/batch, rmse=0.78, Средняя потеря=0.065]
Тестирование 2: 100%|██████████| 94/94 [03:18<00:00,  2.11s/batch, rmse=0.76, Средняя потеря=0.061]
Эпоха 3: 100%|██████████| 376/376 [13:20<00:00,  2.13s/batch, rmse=0.77, Средняя потеря=0.0618]
Тестирование 3: 100%|██████████| 94/94 [03:18<00:00,  2.11s/batch, rmse=0.74, Средняя потеря=0.058]
Эпоха 4: 100%|██████████| 376/376 [13:43<00:00,  2.19s/batch, rmse=0.76, Средняя потеря=0.0606]
Тестирование 4: 100%|██████████| 94/94 [03:25<00:00,  2.18s/batch, rmse=0.74, Средняя потеря=0.058]
Эпоха 5: 100%|██████████| 376/376 [13:24<00:00,  2.14s/batch, rmse=0.75, Средняя потеря=0.0597]
Тестирование 5: 100%|██████████| 94/94 [03:18<00:00,  2.11s/batch, rmse=0.74, Средняя потеря=0.0579]
Эпоха 6: 100%|█████

Ранняя остановка на эпохе 13 из-за отсутствия улучшения точности на тестовой выборке





In [None]:
model_tabular = TransformerModelWithAttention(num_layers=2, input_dim=1024, hidden_dim=128, num_heads=2).to(device)
optimizer = optim.Adam(params = model_tabular.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_tabular, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL + "_" + step)

Эпоха 1: 100%|██████████| 376/376 [13:16<00:00,  2.12s/batch, rmse=0.82, Средняя потеря=0.0721]
Тестирование 1: 100%|██████████| 94/94 [03:17<00:00,  2.10s/batch, rmse=0.77, Средняя потеря=0.0626]
Эпоха 2: 100%|██████████| 376/376 [13:14<00:00,  2.11s/batch, rmse=0.78, Средняя потеря=0.0636]
Тестирование 2: 100%|██████████| 94/94 [03:16<00:00,  2.09s/batch, rmse=0.82, Средняя потеря=0.069]
Эпоха 3: 100%|██████████| 376/376 [13:12<00:00,  2.11s/batch, rmse=0.76, Средняя потеря=0.0617]
Тестирование 3: 100%|██████████| 94/94 [03:16<00:00,  2.09s/batch, rmse=0.76, Средняя потеря=0.0612]
Эпоха 4: 100%|██████████| 376/376 [13:13<00:00,  2.11s/batch, rmse=0.76, Средняя потеря=0.061]
Тестирование 4: 100%|██████████| 94/94 [03:16<00:00,  2.09s/batch, rmse=0.76, Средняя потеря=0.0602]
Эпоха 5: 100%|██████████| 376/376 [13:13<00:00,  2.11s/batch, rmse=0.75, Средняя потеря=0.0597]
Тестирование 5: 100%|██████████| 94/94 [03:16<00:00,  2.09s/batch, rmse=0.76, Средняя потеря=0.0604]
Эпоха 6: 100%|███

MMM_project/notebooks/Models_transformer_decision-level_text/TransformerModelWithAttention_6_0.74_checkpoint.pth

In [None]:
model_tabular = TransformerModelWithAttention(num_layers=2, input_dim=1024, hidden_dim=128, num_heads=2).to(device)
optimizer = optim.Adam(params = model_tabular.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_tabular, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL + "_" + step)

Эпоха 1: 100%|██████████| 376/376 [06:28<00:00,  1.03s/batch, rmse=0.84, Средняя потеря=0.0743]
Тестирование 1: 100%|██████████| 94/94 [01:35<00:00,  1.02s/batch, rmse=0.79, Средняя потеря=0.0646]
Эпоха 2: 100%|██████████| 376/376 [06:25<00:00,  1.03s/batch, rmse=0.78, Средняя потеря=0.0636]
Тестирование 2: 100%|██████████| 94/94 [01:35<00:00,  1.02s/batch, rmse=0.75, Средняя потеря=0.0596]
Эпоха 3: 100%|██████████| 376/376 [06:26<00:00,  1.03s/batch, rmse=0.77, Средняя потеря=0.0622]
Тестирование 3: 100%|██████████| 94/94 [01:35<00:00,  1.02s/batch, rmse=0.77, Средняя потеря=0.0614]
Эпоха 4: 100%|██████████| 376/376 [06:26<00:00,  1.03s/batch, rmse=0.76, Средняя потеря=0.0604]
Тестирование 4: 100%|██████████| 94/94 [01:35<00:00,  1.02s/batch, rmse=0.75, Средняя потеря=0.0597]
Эпоха 5: 100%|██████████| 376/376 [06:26<00:00,  1.03s/batch, rmse=0.75, Средняя потеря=0.0603]
Тестирование 5: 100%|██████████| 94/94 [01:35<00:00,  1.02s/batch, rmse=0.74, Средняя потеря=0.0579]
Эпоха 6: 100%|█

In [9]:
model_tabular = TransformerModelWithAttention(num_layers=2, input_dim=1024, hidden_dim=128, num_heads=2).to(device)
optimizer = optim.Adam(params = model_tabular.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_tabular, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL + "_" + step)

Эпоха 1: 100%|██████████| 376/376 [07:03<00:00,  1.13s/batch, rmse=0.84, Средняя потеря=0.0764]
Тестирование 1: 100%|██████████| 94/94 [01:41<00:00,  1.08s/batch, rmse=0.76, Средняя потеря=0.061]
Эпоха 2: 100%|██████████| 376/376 [06:46<00:00,  1.08s/batch, rmse=0.78, Средняя потеря=0.0647]
Тестирование 2: 100%|██████████| 94/94 [01:40<00:00,  1.07s/batch, rmse=0.75, Средняя потеря=0.0601]
Эпоха 3: 100%|██████████| 376/376 [06:51<00:00,  1.09s/batch, rmse=0.77, Средняя потеря=0.0624]
Тестирование 3: 100%|██████████| 94/94 [01:41<00:00,  1.08s/batch, rmse=0.79, Средняя потеря=0.0671]
Эпоха 4: 100%|██████████| 376/376 [06:49<00:00,  1.09s/batch, rmse=0.76, Средняя потеря=0.0619]
Тестирование 4: 100%|██████████| 94/94 [01:40<00:00,  1.07s/batch, rmse=0.76, Средняя потеря=0.0616]
Эпоха 5: 100%|██████████| 376/376 [06:42<00:00,  1.07s/batch, rmse=0.75, Средняя потеря=0.0597]
Тестирование 5: 100%|██████████| 94/94 [01:40<00:00,  1.07s/batch, rmse=0.75, Средняя потеря=0.0591]
Эпоха 6: 100%|██

Ранняя остановка на эпохе 17 из-за отсутствия улучшения точности на тестовой выборке





In [7]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_avito('train', len_1=7517, len_2=7517), batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset=Dataset_avito('val', len_1=7517, len_2=7517), batch_size=BATCH_SIZE, shuffle=False)

In [None]:
model_tabular = TransformerModelWithAttention(num_layers=2, input_dim=1024, hidden_dim=128, num_heads=2).to(device)
optimizer = optim.Adam(params = model_tabular.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_tabular, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL + "_" + step)

Эпоха 1: 100%|██████████| 376/376 [31:02<00:00,  4.95s/batch, rmse=0.92, Средняя потеря=0.0894]
Тестирование 1: 100%|██████████| 94/94 [07:32<00:00,  4.81s/batch, rmse=0.86, Средняя потеря=0.0769]
Эпоха 2: 100%|██████████| 376/376 [28:22<00:00,  4.53s/batch, rmse=0.88, Средняя потеря=0.0803]
Тестирование 2: 100%|██████████| 94/94 [06:49<00:00,  4.36s/batch, rmse=0.86, Средняя потеря=0.0773]
Эпоха 3: 100%|██████████| 376/376 [26:51<00:00,  4.29s/batch, rmse=0.85, Средняя потеря=0.0754]
Тестирование 3: 100%|██████████| 94/94 [06:33<00:00,  4.19s/batch, rmse=0.86, Средняя потеря=0.0766]
Эпоха 4: 100%|██████████| 376/376 [27:05<00:00,  4.32s/batch, rmse=0.84, Средняя потеря=0.0737]
Тестирование 4: 100%|██████████| 94/94 [06:49<00:00,  4.36s/batch, rmse=0.85, Средняя потеря=0.077]
Эпоха 5: 100%|██████████| 376/376 [27:33<00:00,  4.40s/batch, rmse=0.84, Средняя потеря=0.073]
Тестирование 5: 100%|██████████| 94/94 [06:53<00:00,  4.39s/batch, rmse=0.84, Средняя потеря=0.0733]
Эпоха 6: 100%|███

## LSTM

In [7]:
class LSTM(nn.Module):
    def __init__(self, input_size = 1024, hidden_size = 64, num_layers = 2, dropout = 0.1, bidirectional=True):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True,
            dropout = dropout,
            bidirectional=bidirectional
        )
        if bidirectional:
            self.fc = nn.Linear(2 * hidden_size, 1)
        else:
            self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        if self.lstm.bidirectional:
            h0, c0 = torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device)
        else:
            h0, c0 = torch.zeros(self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(self.num_layers, len(x), self.hidden_size).to(device)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        if self.lstm.bidirectional:
            out = torch.cat((hn[-2, :, :], hn[-1, :, :]), dim=1)
        else:
            out = out[:, -1, :]
        out = self.fc(out)
        return out

In [8]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_avito('train', len_1=7517, len_2=7517), batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset=Dataset_avito('val', len_1=7517, len_2=7517), batch_size=BATCH_SIZE, shuffle=False)

In [9]:
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_lstm_decision-level")

In [10]:
model_lstm = LSTM().to(device)
optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_lstm, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL)

Эпоха 1: 100%|██████████| 376/376 [13:40<00:00,  2.18s/batch, rmse=0.92, Средняя потеря=0.0879]
Тестирование 1: 100%|██████████| 94/94 [03:25<00:00,  2.18s/batch, rmse=0.9, Средняя потеря=0.0844]
Эпоха 2: 100%|██████████| 376/376 [14:10<00:00,  2.26s/batch, rmse=0.9, Средняя потеря=0.0854]
Тестирование 2: 100%|██████████| 94/94 [03:26<00:00,  2.20s/batch, rmse=0.89, Средняя потеря=0.0836]
Эпоха 3: 100%|██████████| 376/376 [13:58<00:00,  2.23s/batch, rmse=0.9, Средняя потеря=0.0853]
Тестирование 3: 100%|██████████| 94/94 [03:25<00:00,  2.18s/batch, rmse=0.89, Средняя потеря=0.0832]
Эпоха 4: 100%|██████████| 376/376 [14:00<00:00,  2.23s/batch, rmse=0.9, Средняя потеря=0.0852]
Тестирование 4: 100%|██████████| 94/94 [03:25<00:00,  2.19s/batch, rmse=0.89, Средняя потеря=0.0831]
Эпоха 5: 100%|██████████| 376/376 [13:58<00:00,  2.23s/batch, rmse=0.9, Средняя потеря=0.0852]
Тестирование 5: 100%|██████████| 94/94 [03:22<00:00,  2.15s/batch, rmse=0.89, Средняя потеря=0.0831]
Эпоха 6: 100%|██████

Ранняя остановка на эпохе 12 из-за отсутствия улучшения точности на тестовой выборке





In [None]:
model_lstm = LSTM().to(device)
optimizer = optim.Adam(params = model_lstm.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
step = "text"
trainer = ModelTrainer(model_lstm, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, step)
trainer.train(PATH_TO_MODEL)

Эпоха 1: 100%|██████████| 376/376 [11:40<00:00,  1.86s/batch, rmse=0.91, Средняя потеря=0.0875]
Тестирование 1: 100%|██████████| 94/94 [02:53<00:00,  1.85s/batch, rmse=0.89, Средняя потеря=0.0838]
Эпоха 2: 100%|██████████| 376/376 [11:45<00:00,  1.88s/batch, rmse=0.9, Средняя потеря=0.0854]
Тестирование 2: 100%|██████████| 94/94 [02:54<00:00,  1.86s/batch, rmse=0.89, Средняя потеря=0.0833]
Эпоха 3: 100%|██████████| 376/376 [11:42<00:00,  1.87s/batch, rmse=0.9, Средняя потеря=0.0852]
Тестирование 3: 100%|██████████| 94/94 [02:59<00:00,  1.91s/batch, rmse=0.89, Средняя потеря=0.0831]
Эпоха 4: 100%|██████████| 376/376 [11:39<00:00,  1.86s/batch, rmse=0.9, Средняя потеря=0.0852]
Тестирование 4: 100%|██████████| 94/94 [02:57<00:00,  1.89s/batch, rmse=0.89, Средняя потеря=0.0831]
Эпоха 5: 100%|██████████| 376/376 [11:54<00:00,  1.90s/batch, rmse=0.9, Средняя потеря=0.0851]
Тестирование 5: 100%|██████████| 94/94 [02:56<00:00,  1.88s/batch, rmse=0.89, Средняя потеря=0.0833]
Эпоха 6: 100%|█████