In [82]:
import math

import numpy as np
import torch
from catboost.datasets import msrank_10k
from sklearn.preprocessing import StandardScaler

from typing import List


class ListNet(torch.nn.Module):
    def __init__(self, num_input_features: int, hidden_dim: int):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.model = torch.nn.Sequential(
            torch.nn.Linear(num_input_features, self.hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Linear(self.hidden_dim, 1),
        )

    def forward(self, input_1: torch.Tensor) -> torch.Tensor:
        logits = self.model(input_1)
        return logits


class Solution:
    def __init__(self, n_epochs: int = 5, listnet_hidden_dim: int = 30,
                 lr: float = 0.001, ndcg_top_k: int = 10):
        self._prepare_data()
        self.num_input_features = self.X_train.shape[1]
        self.ndcg_top_k = ndcg_top_k
        self.n_epochs = n_epochs

        self.model = self._create_model(
            self.num_input_features, listnet_hidden_dim)
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)

    def _get_data(self) -> List[np.ndarray]:
        train_df, test_df = msrank_10k()

        X_train = train_df.drop([0, 1], axis=1).values
        y_train = train_df[0].values
        query_ids_train = train_df[1].values.astype(int)

        X_test = test_df.drop([0, 1], axis=1).values
        y_test = test_df[0].values
        query_ids_test = test_df[1].values.astype(int)

        return [X_train, y_train, query_ids_train, X_test, y_test, query_ids_test]

    def _prepare_data(self) -> None:
        (X_train, y_train, self.query_ids_train,
            X_test, y_test, self.query_ids_test) = self._get_data()

        self.X_train = self._scale_features_in_query_groups(X_train, self.query_ids_train)
        self.X_test = self._scale_features_in_query_groups(X_test, self.query_ids_test)

        self.ys_train = torch.FloatTensor(y_train)
        self.ys_test = torch.FloatTensor(y_test)

    
#     def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,
#                                         inp_query_ids: np.ndarray) -> np.ndarray:
        
#         total_inp_scaled = np.empty((0, inp_feat_array.shape[1])) 
        
#         total_ids_list = []
        
#         for for_v in np.unique(inp_query_ids):
#             ids_list = [i for i, v in enumerate(inp_query_ids) if v==for_v]
            
            
#             scaler = StandardScaler()
            
#             group_inp_scaled = scaler.fit_transform(inp_feat_array[ids_list])
                        
#             total_inp_scaled = np.concatenate([total_inp_scaled, 
#                                                group_inp_scaled])
    
#             total_ids_list.append(ids_list)
        
#         total_ids_list = [i for sl in total_ids_list for i in sl]
            
#         sort_ind = np.argsort(total_ids_list)
        
#         total_inp_scaled = torch.FloatTensor(total_inp_scaled[sort_ind])
     
#         return total_inp_scaled
    
    # ideal
    def _scale_features_in_query_groups(self, inp_feat_array: np.ndarray,
                                        inp_query_ids: np.ndarray) -> np.ndarray:
        for cur_id in np.unique(inp_query_ids):
            mask = inp_query_ids == cur_id
            tmp_array = inp_feat_array[mask]
            scaler = StandardScaler()
            inp_feat_array[mask] = scaler.fit_transform(tmp_array)

        return inp_feat_array


    def _create_model(self, listnet_num_input_features: int,
                      listnet_hidden_dim: int) -> torch.nn.Module:
        torch.manual_seed(0)

        net = ListNet(listnet_num_input_features, listnet_hidden_dim)
        
        return net
    

#     def fit(self) -> List[float]:
        
#         score_list = []
        
#         for e in range(self.n_epochs):
            
#             N_train = len(self.X_train)
#             idx = torch.randperm(N_train) # шафлим чтобы 1 батч 1й эпохи отличался от 1 бача во 2й
#             self.X_train = self.X_train[idx]
#             self.ys_train = self.ys_train[idx]
        
#             print(f'epoch {e+1}')
#             self._train_one_epoch()
#             score_ep = self._eval_test_set()
#             print(f'score on eval set: {round(score_ep, 4)}')
#             score_list.append(score_ep)
#             print('\n')
            
#         return score_list
    
    # ideal
    def fit(self) -> List[float]:
        metrics = []
        # нужны ли пермутации?
        for epoch_no in range(1, self.n_epochs + 1):
            self._train_one_epoch()
            ep_metric = self._eval_test_set()
            metrics.append(ep_metric)
        return metrics


    def _train_one_epoch(self) -> None:
        
        self.model.train()
    
        for train_id_cat in np.unique(self.query_ids_train):

#             idx_list = [i for i, v in enumerate(self.query_ids_train) if v==train_id_cat]
#             batch_X = self.X_train[idx_list]
#             batch_ys = self.ys_train[idx_list]
            
            mask_train = self.query_ids_train == train_id_cat
            batch_X = self.X_train[mask_train]
            batch_ys = self.ys_train[mask_train]
            
            self.optimizer.zero_grad() # чтобы не накапливать градиенты

            batch_pred = self.model(batch_X).reshape(-1, ) 
            # не нужно прописывать model.forward тк при наследовании от класса torch.nn.Module метод __call__ заменяется на метод forward
            batch_loss = self._calc_loss(batch_ys, batch_pred)

            batch_loss.backward(retain_graph=True) # возвращаемый тензор может прокидывать через себя градиенты (batch_loss)
            # backward рассчитывает градиенты, накапливаются градиенты, по которым мы можем делать шаг градиентного спуска (step)
#             retain_graph=True часто не нужен
            self.optimizer.step()

    
    def _eval_test_set(self) -> float:
        with torch.no_grad(): # заставляем торч не рассчитывать градиенты
            self.model.eval() 
            # в данном примере не на что не влияет потому что в модели нет слоев которые изменяются на этапе обучения и предсказания(типа dropout, batchnorm)
            # dropout рандомно зануляет признаки симулирую их выкидывание - чтобы не переобучаться, для генерализации
            # eval - при предсказании не нужно выбрасывать признаки
            ndcgs = []
            for test_id_cat in np.unique(self.query_ids_test):

                idx_list = [i for i, v in enumerate(self.query_ids_test) if v==test_id_cat]

                valid_pred = self.model(self.X_test[idx_list])
                ys_valid = self.ys_test[idx_list]

                ndcg_score = self._ndcg_k(ys_valid, valid_pred, self.ndcg_top_k)

                ndcgs.append(ndcg_score)
                
#                 print(ndcg_score)

            return np.mean(ndcgs)
       
    # idel
    def _eval_test_set(self) -> float:
        with torch.no_grad():
            self.model.eval()
            ndcgs = []
            for cur_id in np.unique(self.query_ids_test):
                mask = self.query_ids_test == cur_id
                X_test_tmp = self.X_test[mask]
                valid_pred = self.model(X_test_tmp)
                ndcg_score = self._ndcg_k(
                    self.ys_test[mask], valid_pred, self.ndcg_top_k)
                if np.isnan(ndcg_score):
                    ndcgs.append(0)
                    continue
                ndcgs.append(ndcg_score)
            return np.mean(ndcgs)
        
    def _calc_loss(self, batch_ys: torch.FloatTensor,
                   batch_pred: torch.FloatTensor) -> torch.FloatTensor:
        
        P_y_i = torch.softmax(batch_ys, dim=0) 
        P_z_i = torch.softmax(batch_pred, dim=0)
        
        loss = -torch.sum(P_y_i * torch.log(P_z_i))
#         loss = -torch.sum(P_y_i * torch.log(P_z_i/P_y_i))
        return loss

        
    def _ndcg_k(self, ys_true, ys_pred, ndcg_top_k) -> float:
        def dcg(ys_true, ys_pred):
            _, argsort = torch.sort(ys_pred, descending=True, dim=0)
            argsort = argsort[:ndcg_top_k]
            ys_true_sorted = ys_true[argsort]
            ret = 0
            for i, l in enumerate(ys_true_sorted, 1):
                ret += (2 ** l - 1) / math.log2(1 + i)
            return ret
        ideal_dcg = dcg(ys_true, ys_true)
        pred_dcg = dcg(ys_true, ys_pred)
        return (pred_dcg / ideal_dcg).item()
    
    
#     def _ndcg_k(self, ys_true: torch.Tensor, ys_pred: torch.Tensor,
#                 ndcg_top_k: int) -> float:

#         ys_ideal_true_sort, _ = torch.sort(ys_true, descending=True, dim=0)
    
#         if ndcg_top_k:
#             ys_ideal_true_sort = ys_ideal_true_sort[:ndcg_top_k]

#         ideal_dcg = 0
#         for ix, e in enumerate(ys_ideal_true_sort):
#             ideal_dcg += self.compute_gain(float(e), gain_scheme="exp2")/math.log2(ix+2)

#         dcg_ = self.dcg(ys_true, ys_pred, gain_scheme="exp2", top_k=ndcg_top_k)
        
#         return dcg_/ideal_dcg


#     def dcg(self, ys_true: torch.Tensor, ys_pred: torch.Tensor, gain_scheme: str, top_k: int) -> float:
#         ys_pred_sort, sort_indices = torch.sort(ys_pred, descending=True, dim=0)
#         ys_true_sort = ys_true[sort_indices]
        
#         if top_k:
#             ys_true_sort = ys_true_sort[:top_k]

#         dcg_ = 0
#         for ix, e in enumerate(ys_true_sort):
#             dcg_ += self.compute_gain(float(e), gain_scheme)/math.log2(ix+2)
            
#         return dcg_

    
#     def compute_gain(self, y_value: float, gain_scheme: str) -> float:
#         if gain_scheme=="const":
#             return y_value
#         elif gain_scheme=="exp2":
#             # return 2**y_value - 1
#             return pow(2, y_value) - 1
#         else:
#             return None


In [84]:
s = Solution()

In [85]:
l = s.fit()

epoch 1
score on eval set: 0.4254
----------------------------------------------------------------------------------------------------


epoch 2
score on eval set: 0.4214
----------------------------------------------------------------------------------------------------


epoch 3
score on eval set: 0.4434
----------------------------------------------------------------------------------------------------


epoch 4
score on eval set: 0.4248
----------------------------------------------------------------------------------------------------


epoch 5
score on eval set: 0.4172
----------------------------------------------------------------------------------------------------




In [87]:
l

[0.42544628610717683,
 0.4214029931984315,
 0.443382901354363,
 0.4247930105962125,
 0.41723439299587833]