In [None]:
import pandas as pd
import torch.nn as nn
import torch
from tqdm import tqdm
from torch import Tensor
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Данные

In [2]:
test = pd.read_csv('../data/test.csv')

In [3]:
def preprocess(df):
    df['activation_date'] = pd.to_datetime(df['activation_date'])

    df['day'] = df['activation_date'].dt.day
    df['month'] = df["activation_date"].dt.month
    df['year'] = df["activation_date"].dt.year
    df['weekday'] = df['activation_date'].dt.weekday
    df["dayofyear"] = df['activation_date'].dt.dayofyear
    df.drop(columns=['activation_date', 'item_id'], inplace=True)
    df['param_1'] = df['param_1'].fillna('')
    df['param_2'] = df['param_2'].fillna('')
    df['param_3'] = df['param_3'].fillna('')
    df['description'] = df['description'].fillna('')
    return df

item_id = test.item_id
test = preprocess(test)

### Transformer

In [35]:
class TransformerModelWithAttention(nn.Module):
    def __init__(self, input_dim = 1024, hidden_dim=128, num_heads = 4, num_layers = 8, dropout = 0.1):
        super(TransformerModelWithAttention, self).__init__()
        self.in_layer = nn.Linear(input_dim, hidden_dim)
        self.positional_encoding = nn.Parameter(torch.zeros(1, 10000, hidden_dim))
        encoder_layer = nn.TransformerEncoderLayer(d_model = hidden_dim, nhead = num_heads, dim_feedforward = hidden_dim, dropout = dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers = num_layers)
        self.fc_out = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = x.to(device)
        x = self.in_layer(x)
        batch_size, seq_len, _ = x.size()
        x = x + self.positional_encoding[:, :seq_len, :]
        encoder_output = self.transformer_encoder(x)
        x = encoder_output.mean(dim = 1)
        return torch.clamp(self.fc_out(x), 0.0, 1.0).flatten()

In [4]:
import os
jina_list = sorted(os.listdir('../data/jina'), key= lambda x: int(x.replace("jina_test_", "")))

In [5]:
import pickle
import io

class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)

In [37]:
model = TransformerModelWithAttention(num_layers=2, input_dim=1024, hidden_dim=128, num_heads=2)
checkpoint = torch.load("models/TransformerModelWithAttention_1_0.89_checkpoint.pth", map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [79]:
user_type_dict = {'Private': 0, 'Company': 1, 'Shop': 2}
y_pred = []
jina_list_ind = -1
len_test = test.shape[0]
for i, row in tqdm(test.iterrows(), total=len_test):
    if i % 10000 == 0:
        jina_list_ind += 1
        jina_name = jina_list[jina_list_ind]
        with open("../data/jina/" + jina_name, "rb") as f:  
            jina_emb = CPU_Unpickler(f).load()
    tabular = torch.tensor([row["item_seq_number"], row["day"], row["month"], row["year"], row["weekday"], row["dayofyear"], user_type_dict[row["user_type"]], 0.0 if row["price"] is None else row["price"]])
    tabular = tabular.unsqueeze(0).unsqueeze(2).expand(-1, -1, 1024)
    tabular = torch.nan_to_num(tabular,nan=0.0)
    text_embedding = jina_emb[i % 10000].unsqueeze(0)
    emb_concat = torch.concat((tabular, text_embedding), 1)
    y_pred.append(float(model(emb_concat)))

100%|██████████| 508438/508438 [10:21<00:00, 818.04it/s] 


In [80]:
pd.DataFrame({'item_id': item_id, 'deal_probability': y_pred}).to_csv("../results/feature-level-Transformer.csv", index=0)

Результат: 0.30322

### Mamba

In [9]:
from torch.nn.functional import silu
from torch.nn.functional import softplus
from einops import rearrange, repeat, einsum
class RMSNorm(nn.Module):
    def __init__(self, d_model: int, eps: float = 1e-8) -> None:
        super().__init__()
        self.eps = eps
        self.weight = nn.Parameter(torch.ones(d_model))

    def forward(self, x: Tensor) -> Tensor:        
        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim = True) + self.eps) * self.weight

class Mamba(nn.Module):
    def __init__(self, num_layers, d_input, d_model, d_state=16, d_discr=None, ker_size=4):
        super().__init__()
        mamba_par = {
            'd_input' : d_input,
            'd_model' : d_model,
            'd_state' : d_state,
            'd_discr' : d_discr,
            'ker_size': ker_size
        }
        self.layers = nn.ModuleList([nn.ModuleList([MambaBlock(**mamba_par), RMSNorm(d_input)]) for _ in range(num_layers)])
        self.fc_out = nn.Linear(d_input, 1)
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
    def forward(self, seq, cache=None):
        seq = seq.to(self.device)
        for mamba, norm in self.layers:
            out, cache = mamba(norm(seq), cache)
            seq = out + seq
        return self.fc_out(seq.mean(dim = 1))
        
class MambaBlock(nn.Module):
    def __init__(self, d_input, d_model, d_state=16, d_discr=None, ker_size=4):
        super().__init__()
        d_discr = d_discr if d_discr is not None else d_model // 16
        self.in_proj  = nn.Linear(d_input, 2 * d_model, bias=False)
        self.out_proj = nn.Linear(d_model, d_input, bias=False)
        self.s_B = nn.Linear(d_model, d_state, bias=False)
        self.s_C = nn.Linear(d_model, d_state, bias=False)
        self.s_D = nn.Sequential(nn.Linear(d_model, d_discr, bias=False), nn.Linear(d_discr, d_model, bias=False),)
        self.conv = nn.Conv1d(
            in_channels=d_model,
            out_channels=d_model,
            kernel_size=ker_size,
            padding=ker_size - 1,
            groups=d_model,
            bias=True,
        )
        self.A = nn.Parameter(torch.arange(1, d_state + 1, dtype=torch.float).repeat(d_model, 1))
        self.D = nn.Parameter(torch.ones(d_model, dtype=torch.float))
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
    def forward(self, seq, cache=None):
        b, l, d = seq.shape
        (prev_hid, prev_inp) = cache if cache is not None else (None, None)
        a, b = self.in_proj(seq).chunk(2, dim=-1)
        x = rearrange(a, 'b l d -> b d l')
        x = x if prev_inp is None else torch.cat((prev_inp, x), dim=-1)
        a = self.conv(x)[..., :l]
        a = rearrange(a, 'b d l -> b l d')
        a = silu(a)
        a, hid = self.ssm(a, prev_hid=prev_hid) 
        b = silu(b)
        out = a * b
        out =  self.out_proj(out)
        if cache:
            cache = (hid.squeeze(), x[..., 1:])   
        return out, cache
    
    def ssm(self, seq, prev_hid):
        A = -self.A
        D = +self.D
        B = self.s_B(seq)
        C = self.s_C(seq)
        s = softplus(D + self.s_D(seq))
        A_bar = einsum(torch.exp(A), s, 'd s,   b l d -> b l d s')
        B_bar = einsum(          B,  s, 'b l s, b l d -> b l d s')
        X_bar = einsum(B_bar, seq, 'b l d s, b l d -> b l d s')
        hid = self._hid_states(A_bar, X_bar, prev_hid=prev_hid)
        out = einsum(hid, C, 'b l d s, b l s -> b l d')
        out = out + D * seq
        return out, hid
    
    def _hid_states(self, A, X, prev_hid=None):
        b, l, d, s = A.shape
        A = rearrange(A, 'b l d s -> l b d s')
        X = rearrange(X, 'b l d s -> l b d s')
        if prev_hid is not None:
            return rearrange(A * prev_hid + X, 'l b d s -> b l d s')
        h = torch.zeros(b, d, s, device=self.device)
        return torch.stack([h := A_t * h + X_t for A_t, X_t in zip(A, X)], dim=1)

In [12]:
model = Mamba(num_layers=2, d_input=1024, d_model=128).to(device)
checkpoint = torch.load("models/Mamba_11_4.28_checkpoint.pth", map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [13]:
user_type_dict = {'Private': 0, 'Company': 1, 'Shop': 2}
y_pred = []
jina_list_ind = -1
len_test = test.shape[0]
for i, row in tqdm(test.iterrows(), total=len_test):
    if i % 10000 == 0:
        jina_list_ind += 1
        jina_name = jina_list[jina_list_ind]
        with open("../data/jina/" + jina_name, "rb") as f:  
            jina_emb = CPU_Unpickler(f).load()
    tabular = torch.tensor([row["item_seq_number"], row["day"], row["month"], row["year"], row["weekday"], row["dayofyear"], user_type_dict[row["user_type"]], 0.0 if row["price"] is None else row["price"]])
    tabular = tabular.unsqueeze(0).unsqueeze(2).expand(-1, -1, 1024)
    tabular = torch.nan_to_num(tabular,nan=0.0)
    text_embedding = jina_emb[i % 10000].unsqueeze(0)
    emb_concat = torch.concat((tabular, text_embedding), 1)
    y_pred.append(float(model(emb_concat)))

100%|██████████| 508438/508438 [1:18:50<00:00, 107.48it/s] 


In [16]:
pd.DataFrame({'item_id': item_id, 'deal_probability': np.clip(y_pred, 0, 1)}).to_csv("../results/feature-level-Mamba.csv", index=0)

Результат: 0.45980

### LSTM

In [17]:
class LSTM(nn.Module):
    def __init__(self, input_size = 1024, hidden_size = 64, num_layers = 2, dropout = 0.1, bidirectional=True):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = num_layers,
            batch_first = True,
            dropout = dropout,
            bidirectional=bidirectional
        )
        if bidirectional:
            self.fc = nn.Linear(2 * hidden_size, 1)
        else:
            self.fc = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        if self.lstm.bidirectional:
            h0, c0 = torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(2 * self.num_layers, len(x), self.hidden_size).to(device)
        else:
            h0, c0 = torch.zeros(self.num_layers, len(x), self.hidden_size).to(device), torch.zeros(self.num_layers, len(x), self.hidden_size).to(device)
        out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
        if self.lstm.bidirectional:
            out = torch.cat((hn[-2, :, :], hn[-1, :, :]), dim=1)
        else:
            out = out[:, -1, :]
        out = self.fc(out)
        return out

In [18]:
model = LSTM().to(device)
checkpoint = torch.load("models/LSTM_1_0.89_checkpoint.pth", map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [19]:
user_type_dict = {'Private': 0, 'Company': 1, 'Shop': 2}
y_pred = []
jina_list_ind = -1
len_test = test.shape[0]
for i, row in tqdm(test.iterrows(), total=len_test):
    if i % 10000 == 0:
        jina_list_ind += 1
        jina_name = jina_list[jina_list_ind]
        with open("../data/jina/" + jina_name, "rb") as f:  
            jina_emb = CPU_Unpickler(f).load()
    tabular = torch.tensor([row["item_seq_number"], row["day"], row["month"], row["year"], row["weekday"], row["dayofyear"], user_type_dict[row["user_type"]], 0.0 if row["price"] is None else row["price"]])
    tabular = tabular.unsqueeze(0).unsqueeze(2).expand(-1, -1, 1024)
    tabular = torch.nan_to_num(tabular,nan=0.0)
    text_embedding = jina_emb[i % 10000].unsqueeze(0)
    emb_concat = torch.concat((tabular, text_embedding), 1)
    y_pred.append(float(model(emb_concat)))

100%|██████████| 508438/508438 [1:04:46<00:00, 130.82it/s]


In [21]:
pd.DataFrame({'item_id': item_id, 'deal_probability': np.clip(y_pred, 0, 1)}).to_csv("../results/feature-level-LSTM.csv", index=0)

Результат: 0.27722