In [18]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import math
import torch.nn.functional as F

import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings("ignore", category = warn)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Данные

In [14]:
def preprocess(df):
    df['activation_date'] = pd.to_datetime(df['activation_date'])

    df['day'] = df['activation_date'].dt.day
    df['month'] = df["activation_date"].dt.month
    df['year'] = df["activation_date"].dt.year
    df['weekday'] = df['activation_date'].dt.weekday
    df["dayofyear"] = df['activation_date'].dt.dayofyear
    df.drop(columns=['activation_date', 'item_id'], inplace=True)
    df['param_1'] = df['param_1'].fillna('')
    df['param_2'] = df['param_2'].fillna('')
    df['param_3'] = df['param_3'].fillna('')
    df['description'] = df['description'].fillna('')
    return df

test = pd.read_csv('../data/test.csv')
item_id = test.item_id
test = preprocess(test)

На табличных данных + тексте лучшие результаты получили RNN и Transformer соответственно

### Табличные данные RNN

In [6]:
result_tabular = pd.read_csv("../results/rnn-tabular.csv").deal_probability

### Текст Transformer

In [7]:
result_text = pd.read_csv("../results/transformer-text.csv").deal_probability

### Изображения

### Transformer

In [8]:
class PositionWiseFeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dim, dropout=0.1):
        super().__init__()
        self.layer_1 = nn.Linear(input_dim, hidden_dim)
        self.layer_2 = nn.Linear(hidden_dim, input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.layer_1(x)
        x = F.gelu(x)  # Более плавная активация
        x = self.dropout(x)
        return self.layer_2(x)

class AddAndNorm(nn.Module):
    def __init__(self, input_dim, dropout=0.1):
        super().__init__()
        self.norm = nn.LayerNorm(input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, residual):
        return self.norm(x + self.dropout(residual))
    
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, d_model)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.register_buffer("pe", pe)

    def forward(self, x):
        x = x + self.pe[: x.size(1)].detach()  # Отключаем градиенты
        return self.dropout(x)
    

class TransformerEncoderLayer(nn.Module):
    def __init__(self, input_dim, num_heads, dropout=0.1, positional_encoding=False):
        super().__init__()
        self.input_dim = input_dim
        self.self_attention = nn.MultiheadAttention(input_dim, num_heads, dropout=dropout, batch_first=True)
        self.feed_forward = PositionWiseFeedForward(input_dim, input_dim, dropout=dropout)
        self.add_norm_after_attention = AddAndNorm(input_dim, dropout=dropout)
        self.add_norm_after_ff = AddAndNorm(input_dim, dropout=dropout)
        self.positional_encoding = PositionalEncoding(input_dim) if positional_encoding else None

    def forward(self, key, value, query):
        if self.positional_encoding:
            key = self.positional_encoding(key)
            value = self.positional_encoding(value)
            query = self.positional_encoding(query)

        attn_output, _ = self.self_attention(query, key, value, need_weights=False)

        x = self.add_norm_after_attention(attn_output, query)

        ff_output = self.feed_forward(x)
        x = self.add_norm_after_ff(ff_output, x)

        return x

In [9]:
class TransformerModelWithAttention(nn.Module):
    def __init__(self, input_dim = 1024, hidden_dim=128, num_heads = 4, num_layers = 8, dropout = 0.1, positional_encoding=True):
        super(TransformerModelWithAttention, self).__init__()
        self.in_layer = nn.Linear(input_dim, hidden_dim)
        self.positional_encoding = PositionalEncoding(hidden_dim)
        self.transformer_encoder = nn.ModuleList([TransformerEncoderLayer(input_dim=hidden_dim, num_heads=num_heads, positional_encoding=positional_encoding, dropout=dropout) for i in range(num_layers)])
        self.fc_out = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = x.to(device)
        x = self.in_layer(x)
        batch_size, seq_len, _ = x.size()
        x = self.positional_encoding(x)
        for i in range(len(self.transformer_encoder)):
            x = x + self.transformer_encoder[i](x, x, x)
        x = x.mean(dim = 1)
        return self.fc_out(x).flatten()

In [40]:
model = TransformerModelWithAttention(num_layers=2, input_dim=768, hidden_dim=128, num_heads=2)
checkpoint = torch.load("models/TransformerModelWithAttention_4_0.91_checkpoint.pth", map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [15]:
import pickle
import io

class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)

In [41]:
y_pred = []
len_test = test.shape[0]
x = 200
for i, row in tqdm(test.iterrows(), total=len_test):
    if i == 32001:
        x = 100
    if i <= 36600 and i % 200 == 0:
        with open("../data/vit/vit_test_jpg_" + str(i-1+200), "rb") as f: 
            vit_emb = CPU_Unpickler(f).load() 
    if i > 36600 and i % 100 == 0:
        try:
            with open("../data/vit/vit_test_jpg_" + str(i-1+100), "rb") as f: 
                vit_emb = CPU_Unpickler(f).load() 
        except:
            vit_emb = [None] * 100
    image_embedding = vit_emb[i % x]
    if image_embedding is None:
        y_pred.append(0.0)
    else:
        if image_embedding.shape[0] == 1:
            y_pred.append(float(model(image_embedding.float())))
        else:
            y_pred.append(float(model(image_embedding.unsqueeze(0).float())))

100%|██████████| 508438/508438 [02:52<00:00, 2949.60it/s]  


In [42]:
result_image = np.clip(y_pred, 0, 1)
pd.DataFrame({'item_id': item_id, 'deal_probability': result_image}).to_csv("../results/transformer-image.csv", index=0)

### LSTM

In [37]:
class LSTM(nn.Module):
    def __init__(self, input_size = 1024, hidden_size = 64, num_layers = 2, dropout = 0.1, bidirectional=True):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True,
            dropout = dropout,
            bidirectional=bidirectional
        )
        if bidirectional:
            self.fc = nn.Linear(2 * hidden_size, 1)
        else:
            self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        if self.lstm.bidirectional:
            h0, c0 = torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device)
        else:
            h0, c0 = torch.zeros(self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(self.num_layers, len(x), self.hidden_size).to(device)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        if self.lstm.bidirectional:
            out = torch.cat((hn[-2, :, :], hn[-1, :, :]), dim=1)
        else:
            out = out[:, -1, :]
        out = self.fc(out)
        return out

In [43]:
model = LSTM(input_size=768)
checkpoint = torch.load("models/LSTM_1_0.91_checkpoint.pth", map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [44]:
y_pred = []
len_test = test.shape[0]
x = 200
for i, row in tqdm(test.iterrows(), total=len_test):
    if i == 32001:
        x = 100
    if i <= 36600 and i % 200 == 0:
        with open("../data/vit/vit_test_jpg_" + str(i-1+200), "rb") as f: 
            vit_emb = CPU_Unpickler(f).load() 
    if i > 36600 and i % 100 == 0:
        try:
            with open("../data/vit/vit_test_jpg_" + str(i-1+100), "rb") as f: 
                vit_emb = CPU_Unpickler(f).load() 
        except:
            vit_emb = [None] * 100
    image_embedding = vit_emb[i % x]
    if image_embedding is None:
        y_pred.append(0.0)
    else:
        if image_embedding.shape[0] == 1:
            y_pred.append(float(model(image_embedding.float())))
        else:
            y_pred.append(float(model(image_embedding.unsqueeze(0).float())))

100%|██████████| 508438/508438 [11:08<00:00, 760.70it/s]   


In [45]:
result_image = np.clip(y_pred, 0, 1)
pd.DataFrame({'item_id': item_id, 'deal_probability': result_image}).to_csv("../results/LSTM-image.csv", index=0)

### RNN + Transformer + Transformer

In [47]:
result_image = pd.read_csv("../results/transformer-image.csv").deal_probability

In [49]:
pd.DataFrame({'item_id': item_id, 'deal_probability': (result_tabular + result_text + result_image) / 3.0}).to_csv("../results/decision_rnn_transformer_transformer.csv", index=0)

Результат: 0.25816

### RNN + Transformer + LSTM

In [50]:
result_image = pd.read_csv("../results/LSTM-image.csv").deal_probability

In [51]:
pd.DataFrame({'item_id': item_id, 'deal_probability': (result_tabular + result_text + result_image) / 3.0}).to_csv("../results/decision_rnn_transformer_lstm.csv", index=0)

Результат: 0.25829