In [1]:
import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

# Импорт необходимых библиотек
import os
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
from tqdm import tqdm
from PIL import Image
import random
import gc
import pickle

from transformers import AutoTokenizer, AutoModel,AutoModelForMaskedLM
from transformers import ViTImageProcessor, ViTModel
from sklearn.model_selection import train_test_split
import torch
import torch.nn.functional as F
from torch import Tensor
from einops import rearrange
from typing import Tuple, Callable
from torch.autograd import Function
import gc
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import root_mean_squared_error

from torch.utils.data import Dataset, DataLoader 
pd.set_option('display.max_columns', None)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Данные

In [4]:
%%capture --no-stdout
feature_extractor_tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v3", code_revision='da863dd04a4e5dce6814c6625adfba87b83838aa', trust_remote_code=True)
feature_extractor_model = AutoModel.from_pretrained("jinaai/jina-embeddings-v3", code_revision='da863dd04a4e5dce6814c6625adfba87b83838aa', trust_remote_code=True).to(device)

In [49]:
image_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
image_model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k').to(device)

In [2]:
def preprocess(df):
    df['activation_date'] = pd.to_datetime(df['activation_date'])

    df['day'] = df['activation_date'].dt.day
    df['month'] = df["activation_date"].dt.month
    df['year'] = df["activation_date"].dt.year
    df['weekday'] = df['activation_date'].dt.weekday
    df["dayofyear"] = df['activation_date'].dt.dayofyear
    df.drop(columns=['activation_date', 'item_id'], inplace=True)
    df['param_1'] = df['param_1'].fillna('')
    df['param_2'] = df['param_2'].fillna('')
    df['param_3'] = df['param_3'].fillna('')
    df['description'] = df['description'].fillna('')
    return df

#item_id = test.item_id
#train = preprocess(train)
#test = preprocess(test)

In [3]:
class Dataset_avito(): 
    def __init__(self, part='train', path=None, len_1=15034, len_2=15034): 
        train = pd.read_csv('../data/train.csv')
        train_1 = train[train.deal_probability != 0.0].iloc[0:len_1]
        train_2 = train[train.deal_probability == 0.0].iloc[0:len_2]
        train = pd.concat([train_1, train_2])
        train = preprocess(train)
        train.loc[:, "index_col"] = list(range(len(train)))
        train.set_index('index_col', drop=True, append=False, inplace=True)
        X_train, X_val, y_train, y_val = train_test_split(train.drop(columns=['deal_probability']), train['deal_probability'], test_size=0.2, random_state=42)
        self.x = X_train if part == 'train' else X_val
        self.y = y_train if part == 'train' else y_val
        self.n_samples = X_train.shape[0] if part == 'train' else X_val.shape[0]
        self.text = list(self.x.apply(lambda item: '\n'.join([ item["title"], str(item["description"]), item["region"], item["city"], item["parent_category_name"], item["category_name"], ('' if item["param_1"] is None else str(item["param_1"])), ('' if item["param_2"] is None else str(item["param_2"])), ('' if item["param_3"] is None else str(item["param_3"]))]), axis=1).values)
        user_type_dict = {'Private': 0, 'Company': 1, 'Shop': 2}
        self.tabular = list(self.x.apply(lambda item: torch.tensor([item["item_seq_number"], item["day"], item["month"], item["year"], item["weekday"], item["dayofyear"], user_type_dict[item["user_type"]], 0.0 if item["price"] is None else item["price"]]), axis=1).values)
        self.image = list(self.x.index.values)
        
        if path is not None:
            index = list(self.x.index.values)
            with open(path, 'rb') as f:
                self.text_embedding_all = pickle.load(f)
            self.text_embedding = []
            for i in index:
                self.text_embedding.append(self.text_embedding_all[i])
                
        else:
            self.text_embedding = []
            for t in tqdm(self.text):
                encoded_input = feature_extractor_tokenizer(t, padding=True, truncation=True, return_tensors='pt').to(self.device)
                with torch.no_grad():
                    features = feature_extractor_model(**encoded_input)[0][0]
                self.text_embedding.append(features)

    def __getitem__(self, index): 
        return self.tabular[index], self.text[index], self.text_embedding[index], self.image[index], np.array(self.y)[index] 
        
    def __len__(self): 
        return self.n_samples

In [4]:
def custom_collate_fn(batch):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch = [x for x in batch if x is not None]
    if not batch:
        return None
    

    return [
        [torch.tensor(b[0]) for b in batch],
        [b[1] for b in batch],
        [torch.tensor(b[2]) for b in batch],
        [b[3] for b in batch],
        [b[4] for b in batch],
    ]

In [5]:
BATCH_SIZE = 32
train_dataloader = DataLoader(dataset=Dataset_avito('train', path='train_text_features'), batch_size=BATCH_SIZE, shuffle=True, collate_fn=custom_collate_fn)
val_dataloader = DataLoader(dataset=Dataset_avito('val', path='train_text_features'), batch_size=BATCH_SIZE, shuffle=False, collate_fn=custom_collate_fn)

## Обучение

In [6]:
from dataclasses import dataclass
from typing import ClassVar
from typing import List, Dict, Any, Tuple, Optional
@dataclass
class ModelTrainer:
    model: 'typing.Any'
    train_dataloader: DataLoader
    val_dataloader: DataLoader
    device: torch.device
    epochs: int
    round_loss: int
    round_rmse: int

    optimizer: torch.optim
    loss_fn: 'typing.Any'
    
    
    patience: int = 10 # Ранняя остановка обучения
    random_seed: int = 0

    def __post_init__(self):        
        # История обучения и тестирования
        self.__history = pd.DataFrame({
            "train_avg": [], # Средние метрики на тренировочной выборке
            "val_avg": [], # Средние метрики на валидационной выборке
            "train_loss": [], # Loss на тренировочной выборке
            "val_loss": [], # Loss на валидационной выборке
        })

        # Количество шагов в одной эпохе
        self.__train_steps = len(self.train_dataloader)
        self.__val_steps = len(self.val_dataloader)

        self.__best_val_avg = 0
        self.__no_improvement_count = 0
        
        self.loss_fn = self.loss_fn
        if self.random_seed > 0:
            random.seed(self.random_seed)
            torch.manual_seed(self.random_seed)
            torch.cuda.manual_seed_all(self.random_seed)
            torch.backends.cudnn.deterministic = True
            torch.backends.cudnn.benchmark = False
            os.environ['PYTHONHASHSEED'] = str(self.random_seed)
            generator = torch.Generator()
            generator.manual_seed(self.random_seed)

    @property
    def history(self) -> pd.DataFrame:
        """Получение DataFrame историей обучения и тестирования

        Returns:
            pd.DataFrame: **DataFrame** c историей обучения и тестирования
        """

        return self.__history

    @classmethod
    def _is_best_model(self, dev_avg: float) -> bool:
        """Проверка, является ли текущая модель лучшей на основе метрик валидации

        Args:
            test_accuracy (float): Текущая точность тестирования

        Returns:
            bool: True, если текущая модель лучшая, иначе False
        """

        try:
            min_val_avg = min(self.__history["val_avg"])
        except ValueError:
            min_val_avg = 10**10
        return dev_avg < min_val_avg

    def _save_model(self, epoch: int, path_to_model: str, test_rmse: float, loss: torch.Tensor) -> None:
        """Сохранение модели

        Args:
            epoch (int): Текущая эпоха
            path_to_model (str): Путь для сохранения модели
            test_rmse (float): rmse на тестовой выборке
            loss (torch.Tensor): Значение потерь
        """
        
        os.makedirs(path_to_model, exist_ok = True)
        self._best_model_name = f"{self.model.__class__.__name__}_{epoch}_{test_rmse}_checkpoint.pth"

        torch.save({
            "epoch": epoch,
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "test_loss": loss,
        }, os.path.join(path_to_model, f"{self.model.__class__.__name__}_{epoch}_{test_rmse}_checkpoint.pth"))
    
    # Процесс обучения
    def train(self, path_to_model: str) -> None:
        """Процесс обучения

        Args:
            path_to_model (str): Путь для сохранения моделей

        Returns:
            None
        """
        
        losses_train_list = []
        losses_val_list = []
        rmse_train_list = []
        rmse_val_list = []
        min_val_rmse = 10**10

        for epoch in range(1, self.epochs + 1):
            with torch.no_grad():
                torch.cuda.empty_cache()
            self.model.train() # Установка модели в режим обучения
            # Сумма Loss
            total_train_loss = 0
            total_val_loss = 0
            # Сумма rmse
            train_rmse = 0
            val_rmse = 0

            # Проход по всем тренировочным пакетам
            with tqdm(total = self.__train_steps, desc = f"Эпоха {epoch}", unit = "batch") as pbar_train:
                for batch, (tabular, text, text_embedding, images, targets) in enumerate(self.train_dataloader, 1):
                    text_embedding = torch.nn.utils.rnn.pad_sequence(text_embedding, batch_first=True)
                    text_embedding = text_embedding.to(device)
                    image_embedding = []
                    for i in range(len(images)):
                        with open("../data/vit_train_jpg/" + str(images[i]), 'rb') as f:
                            x = pickle.load(f)
                        image_embedding.append(x.squeeze(dim=0))
                    image_embedding = torch.nn.utils.rnn.pad_sequence(image_embedding, batch_first=True)
                    image_embedding = image_embedding.to(device)
                    targets = torch.tensor(targets).to(device)
                    
                    logits = self.model(text_embedding, image_embedding).flatten()
                    if logits.isnan().sum() != 0:
                            print(logits.isnan().sum())
                    logits = torch.nan_to_num(logits, nan=0.0)
                    loss = self.loss_fn(logits, targets.float()) # Ошибка предсказаний

                    # Обратное распространение для обновления весов
                    self.optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                    self.optimizer.step()
        
                    total_train_loss += loss.item() # Потеря
                    # RMSE
                    train_rmse += root_mean_squared_error(targets.cpu().detach().numpy(), logits.cpu().detach().numpy())
        
                    pbar_train.update(1)
                    with torch.no_grad():
                        torch.cuda.empty_cache()

                # Средняя потеря
                avg_train_loss = round(total_train_loss / batch, self.round_loss)
                losses_train_list.append(avg_train_loss)
        
                # RMSE
                train_rmse = round(train_rmse / len(self.train_dataloader.dataset) * 100, self.round_rmse)
                rmse_train_list.append(train_rmse)
        
                pbar_train.set_postfix({
                    "rmse": train_rmse,
                    "Средняя потеря": avg_train_loss
                })
            
            
            # Установка модели в режим предсказаний
            self.model.eval()
        
            # Предсказания на валидационной выборке
            with torch.no_grad():
                with tqdm(total = self.__val_steps, desc = f"Тестирование {epoch}", unit = "batch") as pbar_val:
                    for batch, (tabular, text, text_embedding, images, targets) in enumerate(self.val_dataloader, 1):
                        text_embedding = torch.nn.utils.rnn.pad_sequence(text_embedding, batch_first=True)
                        text_embedding = text_embedding.to(device)
                        image_embedding = []
                        for i in range(len(images)):
                            with open("../data/vit_train_jpg//" + str(images[i]), 'rb') as f:
                                x = pickle.load(f)
                            image_embedding.append(x.squeeze(dim=0))
                        image_embedding = torch.nn.utils.rnn.pad_sequence(image_embedding, batch_first=True)
                        image_embedding = image_embedding.to(device)
                        image_embedding = image_embedding.squeeze(dim=1)
                            
                        targets = torch.tensor(targets).to(device)
                        logits = self.model(text_embedding, image_embedding).flatten()
                        if logits.isnan().sum() != 0:
                            print(logits.isnan().sum())
                        logits = torch.nan_to_num(logits, nan=0.0)
                        loss = self.loss_fn(logits, targets.float()) # Ошибка предсказаний
                        
                        total_val_loss += loss.item() # Потеря
                        # RMSE
                        val_rmse += root_mean_squared_error(targets.cpu().detach().numpy(), logits.cpu().detach().numpy())
        
                        pbar_val.update(1)
                        with torch.no_grad():
                            torch.cuda.empty_cache()
                    # Средняя потеря
                    avg_val_loss = round(total_val_loss / batch, self.round_loss)
                    losses_val_list.append(avg_val_loss)
        
                    # RMSE
                    val_rmse = round(val_rmse / len(self.val_dataloader.dataset) * 100, self.round_rmse)
                    rmse_val_list.append(val_rmse)
                    
                    pbar_val.set_postfix({
                        "rmse": val_rmse,
                        "Средняя потеря": avg_val_loss
                    })
            
            if val_rmse < min_val_rmse:
                min_val_rmse = val_rmse
                self._save_model(epoch, path_to_model, round(val_rmse, self.round_rmse), avg_val_loss)
                self.__best_dev_avg = val_rmse
                self.__no_improvement_count = 0
            else:
                self.__no_improvement_count += 1

            if self.__no_improvement_count >= self.patience:
                print(f"Ранняя остановка на эпохе {epoch} из-за отсутствия улучшения точности на тестовой выборке")
                return path_to_model
                break

    # Получение хэш-значения
    def __hash__(self):
        return id(self)

In [7]:
class PositionWiseFeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout=0.1):
        super().__init__()
        self.layer_1 = nn.Linear(input_dim, hidden_dim)
        self.layer_2 = nn.Linear(hidden_dim, input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.layer_1(x)
        x = F.gelu(x)  # Более плавная активация
        x = self.dropout(x)
        return self.layer_2(x)

class AddAndNorm(nn.Module):
    def __init__(self, input_dim, dropout=0.1):
        super().__init__()
        self.norm = nn.LayerNorm(input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, residual):
        return self.norm(x + self.dropout(residual))
    
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[: x.size(1)].detach()  # Отключаем градиенты
        return self.dropout(x)
    

class TransformerEncoderLayer(nn.Module):
    def __init__(self, input_dim, num_heads, dropout=0.1, positional_encoding=False):
        super().__init__()
        self.input_dim = input_dim
        self.self_attention = nn.MultiheadAttention(input_dim, num_heads, dropout=dropout, batch_first=True)
        self.feed_forward = PositionWiseFeedForward(input_dim, input_dim, dropout=dropout)
        self.add_norm_after_attention = AddAndNorm(input_dim, dropout=dropout)
        self.add_norm_after_ff = AddAndNorm(input_dim, dropout=dropout)
        self.positional_encoding = PositionalEncoding(input_dim) if positional_encoding else None

    def forward(self, key, value, query):
        if self.positional_encoding:
            key = self.positional_encoding(key)
            value = self.positional_encoding(value)
            query = self.positional_encoding(query)

        attn_output, _ = self.self_attention(query, key, value, need_weights=False)

        x = self.add_norm_after_attention(attn_output, query)

        ff_output = self.feed_forward(x)
        x = self.add_norm_after_ff(ff_output, x)

        return x

In [8]:
class MultiModalTransformer(nn.Module):
    def __init__(self, first_dim=768, second_dim=1024, hidden_dim=512, num_transformer_heads=2, positional_encoding=True, dropout=0, mode='mean', device="cuda",  tr_layer_number=1, out_features=128):
        super(MultiModalTransformer, self).__init__()

        self.mode = mode

        self.hidden_dim = hidden_dim

        # Проекционные слои

        self.first_proj = nn.Sequential(
            nn.Conv1d(first_dim, hidden_dim, 1),
            nn.GELU(),
        )

        self.second_proj = nn.Sequential(
            nn.Conv1d(second_dim, hidden_dim, 1),
            nn.GELU(),
        )

        # Механизмы внимания
        self.first_to_second_attn = nn.ModuleList([TransformerEncoderLayer(input_dim=hidden_dim, num_heads=num_transformer_heads, positional_encoding=positional_encoding, dropout=dropout) for i in range(tr_layer_number)
                ])
        self.second_to_first_attn = nn.ModuleList([TransformerEncoderLayer(input_dim=hidden_dim, num_heads=num_transformer_heads, positional_encoding=positional_encoding, dropout=dropout) for i in range(tr_layer_number)
                ])

        self.out = nn.Sequential(
            nn.Linear(hidden_dim*2, out_features) if self.mode == 'mean' else nn.Linear(hidden_dim*4, out_features),
            nn.LayerNorm(out_features),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(out_features, 1)
        )

    def forward(self, first_features, second_features):
        # Преобразование размерностей
        first_features = first_features.float()
        second_features = second_features.float()

        first_features = self.first_proj(first_features.permute(0,2,1)).permute(0,2,1)
        second_features = self.second_proj(second_features.permute(0,2,1)).permute(0,2,1)

        # Адаптивная пуллинг до минимальной длины
        min_seq_len = min(first_features.size(1), second_features.size(1))
        first_features = F.adaptive_avg_pool1d(first_features.permute(0,2,1), min_seq_len).permute(0,2,1)
        second_features = F.adaptive_avg_pool1d(second_features.permute(0,2,1), min_seq_len).permute(0,2,1)

        # Трансформерные блоки
        for i in range(len(self.first_to_second_attn)):
            attn_first = self.first_to_second_attn[i](second_features, first_features, first_features)
            attn_second = self.second_to_first_attn[i](first_features, second_features, second_features)
            first_features += attn_first
            second_features += attn_second

        # Статистики
        std_first, mean_first = torch.std_mean(attn_first, dim=1)
        std_second, mean_second = torch.std_mean(attn_second, dim=1)

        # Классификация
        if self.mode == 'mean':
            return self.out(torch.cat([mean_first, mean_first], dim=1))
        else:
            std_first = torch.nan_to_num(std_first, nan=0.0)
            std_second = torch.nan_to_num(std_second, nan=0.0)
            return self.out(torch.cat([mean_first, std_first, mean_second, std_second], dim=1))

In [9]:
class MultiModalTransformer_all(nn.Module):
    def __init__(self, tabular_dim=8, text_dim=1024, image_dim=768, hidden_dim=512, num_transformer_heads=2, positional_encoding=True, dropout=0, mode='mean', device="cuda",  tr_layer_number=1, out_features=128):
        super(MultiModalTransformer_all, self).__init__()
        
        self.tabular_text_cross = MultiModalTransformer(first_dim=tabular_dim, second_dim=text_dim, hidden_dim=hidden_dim, num_transformer_heads=num_transformer_heads, positional_encoding=positional_encoding, dropout=dropout, mode=mode, device=device,  tr_layer_number=tr_layer_number, out_features=out_features)
        self.tabular_image_cross = MultiModalTransformer(first_dim=tabular_dim, second_dim=image_dim, hidden_dim=hidden_dim, num_transformer_heads=num_transformer_heads, positional_encoding=positional_encoding, dropout=dropout, mode=mode, device=device,  tr_layer_number=tr_layer_number, out_features=out_features)
        self.text_image_cross = MultiModalTransformer(first_dim=text_dim, second_dim=image_dim, hidden_dim=hidden_dim, num_transformer_heads=num_transformer_heads, positional_encoding=positional_encoding, dropout=dropout, mode=mode, device=device,  tr_layer_number=tr_layer_number, out_features=out_features)

    def forward(self, tabular, text_features, image_features):
        tabular_text = self.tabular_text_cross(tabular, text_features)
        tabular_image = self.tabular_image_cross(tabular, image_features)
        text_image = self.text_image_cross(text_features, image_features)
        
        
        return sum([tabular_text, tabular_image, text_image]) / 3.0

In [10]:
EPOCHS = 20 # Количество эпох
BATCH_SIZE = 32 # Размер выборки (пакета)
LEARNING_RATE = 1e-4 # Скорость обучения
ROUND_RMSE = 2 # Знаков Accuracy после запятой
ROUND_LOSS = 7 # Знаков Loss после запятой
ROOT_DIR = os.path.join(".")
PATH_TO_MODEL = os.path.join(ROOT_DIR, "Models_cross_attention_image-text")

In [51]:
model = MultiModalTransformer(first_dim=1024, second_dim=768).to(device)
optimizer = optim.Adam(params = model.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
trainer = ModelTrainer(model, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, random_seed=99)
trainer.train(PATH_TO_MODEL)

Эпоха 1: 100%|██████████| 752/752 [02:33<00:00,  4.91batch/s, rmse=0.86, Средняя потеря=0.0782]
Тестирование 1: 100%|██████████| 188/188 [00:30<00:00,  6.22batch/s, rmse=0.84, Средняя потеря=0.0743]
Эпоха 2: 100%|██████████| 752/752 [02:36<00:00,  4.79batch/s, rmse=0.83, Средняя потеря=0.0721]
Тестирование 2: 100%|██████████| 188/188 [00:29<00:00,  6.30batch/s, rmse=0.83, Средняя потеря=0.0723]
Эпоха 3: 100%|██████████| 752/752 [02:28<00:00,  5.06batch/s, rmse=0.82, Средняя потеря=0.0701]
Тестирование 3: 100%|██████████| 188/188 [00:28<00:00,  6.50batch/s, rmse=0.84, Средняя потеря=0.075]
Эпоха 4: 100%|██████████| 752/752 [02:30<00:00,  4.99batch/s, rmse=0.8, Средняя потеря=0.0677]
Тестирование 4: 100%|██████████| 188/188 [00:29<00:00,  6.48batch/s, rmse=0.83, Средняя потеря=0.0727]
Эпоха 5: 100%|██████████| 752/752 [02:30<00:00,  4.99batch/s, rmse=0.78, Средняя потеря=0.0646]
Тестирование 5: 100%|██████████| 188/188 [00:28<00:00,  6.58batch/s, rmse=0.82, Средняя потеря=0.0698]
Эпоха 6

Ранняя остановка на эпохе 15 из-за отсутствия улучшения точности на тестовой выборке





'./Models_cross_attention_image-text'

In [11]:
model = MultiModalTransformer(first_dim=1024, second_dim=768, mode='not_mean').to(device)
optimizer = optim.Adam(params = model.parameters(), lr = LEARNING_RATE)
loss_fn = nn.MSELoss()
trainer = ModelTrainer(model, train_dataloader, val_dataloader, device, EPOCHS, ROUND_LOSS, ROUND_RMSE, optimizer, loss_fn, random_seed=99)
trainer.train(PATH_TO_MODEL)

Эпоха 1: 100%|██████████| 752/752 [01:33<00:00,  8.08batch/s, rmse=0.86, Средняя потеря=0.0786]
Тестирование 1: 100%|██████████| 188/188 [00:15<00:00, 11.93batch/s, rmse=0.84, Средняя потеря=0.0733]
Эпоха 2: 100%|██████████| 752/752 [01:29<00:00,  8.38batch/s, rmse=0.82, Средняя потеря=0.0709]
Тестирование 2: 100%|██████████| 188/188 [00:15<00:00, 12.45batch/s, rmse=0.83, Средняя потеря=0.0711]
Эпоха 3: 100%|██████████| 752/752 [01:28<00:00,  8.49batch/s, rmse=0.8, Средняя потеря=0.0673]
Тестирование 3: 100%|██████████| 188/188 [00:14<00:00, 12.86batch/s, rmse=0.82, Средняя потеря=0.0701]
Эпоха 4: 100%|██████████| 752/752 [01:27<00:00,  8.58batch/s, rmse=0.78, Средняя потеря=0.0645]
Тестирование 4: 100%|██████████| 188/188 [00:14<00:00, 13.00batch/s, rmse=0.83, Средняя потеря=0.0725]
Эпоха 5: 100%|██████████| 752/752 [01:28<00:00,  8.52batch/s, rmse=0.76, Средняя потеря=0.0614]
Тестирование 5: 100%|██████████| 188/188 [00:14<00:00, 13.10batch/s, rmse=0.8, Средняя потеря=0.068]
Эпоха 6:

Ранняя остановка на эпохе 15 из-за отсутствия улучшения точности на тестовой выборке





'./Models_cross_attention_image-text'