In [1]:
import sys
# the mock-0.3.1 dir contains testcase.py, testutils.py & mock.py
sys.path.append('../')


import functools
import json
import os
from itertools import starmap
from multiprocessing.pool import Pool
from typing import List, Tuple

import luigi
import numpy as np
import pandas as pd
from tqdm import tqdm

from recommendation.fairness_metrics import calculate_fairness_metrics
from recommendation.files import get_test_set_predictions_path
from recommendation.offpolicy_metrics import eval_IPS, eval_CIPS, eval_SNIPS, eval_doubly_robust
from recommendation.rank_metrics import average_precision, precision_at_k, ndcg_at_k, prediction_coverage_at_k, \
    personalization_at_k
from recommendation.task.model.base import BaseEvaluationTask
from recommendation.utils import parallel_literal_eval



In [2]:
def _create_relevance_list(sorted_actions: List[int], expected_action: int, reward: int) -> List[int]:
    if reward == 1:
        return [1 if action == expected_action else 0 for action in sorted_actions]
    else:
        return [0 for _ in sorted_actions]


def _ps_policy_eval(relevance_list: List[int], prob_actions: List[float]) -> List[float]:
    return np.sum(np.array(relevance_list) * np.array(prob_actions[:len(relevance_list)])).tolist()


def _get_rhat_scores(relevance_list: List[int], action_scores: List[float]) -> List[float]:
    return np.sum(np.array(relevance_list) * np.array(action_scores[:len(relevance_list)])).tolist()


def _get_rhat_rewards(relevance_list: List[int]) -> float:
    return relevance_list[0]


In [3]:
df: pd.DataFrame = pd.read_csv(get_test_set_predictions_path("/media/workspace/DeepFood/deep-reco-gym/output/interaction/TrivagoLogisticModelInteraction/results/TrivagoLogisticModelInteraction_selu____model_0299e3cf78"))
    
df.head()

Unnamed: 0,timestamp,timestamp_diff,step,user_idx,session_idx,sum_action_item_before,action_type_item_idx,action_type_idx,list_action_type_idx,list_reference_search_for_poi,...,user_view,hist_views,ps,n_users,n_items,vocab_size,window_hist_size,sorted_actions,prob_actions,action_scores
0,1541461727,1541461708,2.0,3674,3219,0.0,809,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,51.0,3.0,0.058824,4429,2192,241,10,"[625, 97, 798, 1145, 795, 1034, 109, 32, 1095,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.22123219072818756, 0.08879648149013519, 0.0..."
1,1541461727,1541461708,2.0,3674,3219,0.0,809,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,51.0,2.0,0.039216,4429,2192,241,10,"[625, 798, 97, 1145, 795, 109, 1034, 32, 1095,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.16339226067066193, 0.13472728431224823, 0.0..."
2,1541461727,1541461708,2.0,3674,3219,0.0,809,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,51.0,3.0,0.058824,4429,2192,241,10,"[625, 798, 97, 1145, 795, 109, 1034, 32, 1095,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.16893179714679718, 0.12593844532966614, 0.0..."
3,1541461727,1541461708,2.0,3674,3219,0.0,809,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,51.0,3.0,0.058824,4429,2192,241,10,"[625, 798, 97, 1145, 795, 109, 1034, 32, 1095,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.17436540126800537, 0.11745315790176392, 0.0..."
4,1541461727,1541461708,2.0,3674,3219,0.0,809,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,51.0,2.0,0.039216,4429,2192,241,10,"[625, 798, 97, 1145, 795, 109, 1034, 32, 1095,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.18008042871952057, 0.10962656140327454, 0.0..."


In [4]:
with Pool(10) as p:
    print("Creating the relevance lists...")
    df["relevance_list"] = list(
        tqdm(p.starmap(_create_relevance_list,
                       zip(df["sorted_actions"], df['item_idx'], df["clicked"])),
             total=len(df)))
    
    df["rhat_rewards"] = list(
        tqdm(p.map(_get_rhat_rewards, df["relevance_list"]),
             total=len(df)))    

Creating the relevance lists...


100%|██████████| 44648/44648 [00:00<00:00, 5246610.96it/s]
100%|██████████| 44648/44648 [00:00<00:00, 4738904.40it/s]


In [5]:
df["relevance_list"]#.mean()

0        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
2        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
3        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
4        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
                               ...                        
44643    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
44644    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
44645    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
44646    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
44647    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
Name: relevance_list, Length: 44648, dtype: object

In [6]:
gt_df  = df[df["clicked"] == 1]

In [18]:
gt_df.shape

(1826, 46)

In [20]:
gt_df.head()

Unnamed: 0,timestamp,timestamp_diff,step,user_idx,session_idx,sum_action_item_before,action_type_item_idx,action_type_idx,list_action_type_idx,list_reference_search_for_poi,...,user_view,hist_views,ps,n_users,n_items,vocab_size,window_hist_size,sorted_actions,prob_actions,action_scores
1,1541461727,1541461708,2.0,3674,3219,0.0,809,2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,51.0,2.0,0.039216,4429,2192,241,10,"[625, 798, 97, 1145, 795, 109, 1034, 32, 1095,...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.16339226067066193, 0.13472728431224823, 0.0..."
31,1541461746,3082923435,3.0,3674,3219,1.0,798,2,"[0, 0, 0, 0, 0, 0, 0, 0, 2, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,76.0,4.0,0.052632,4429,2192,241,10,"[809, 798, 625, 97, 795, 1039, 32, 1088, 128, ...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.8574801087379456, 0.5615195035934448, 0.326..."
51,1541461753,21580456031,15.0,2474,472,0.0,1735,2,"[2, 2, 3, 3, 3, 4, 2, 2, 2, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,176.0,2.0,0.011364,4429,2192,241,10,"[284, 483, 826, 346, 659, 497, 733, 964, 408, ...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.030128072947263718, 0.01853824593126774, 0...."
81,1541461759,29287773125,20.0,1373,1572,19.0,221,2,"[5, 5, 5, 5, 5, 5, 5, 5, 5, 5]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",...,26.0,2.0,0.076923,4429,2192,241,10,"[221, 133, 565, 21, 798, 939, 264, 106, 125, 8...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.9924634695053101, 0.5967817902565002, 0.412..."
97,1541461770,163394825917,107.0,1914,3635,16.0,616,2,"[5, 5, 5, 5, 5, 10, 5, 5, 5, 5]","[0, 0, 0, 0, 0, 2, 0, 0, 0, 0]",...,176.0,1.0,0.005682,4429,2192,241,10,"[616, 565, 124, 352, 1066, 221, 6, 93, 111, 17...","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.9762786626815796, 0.8156233429908752, 0.166..."


In [23]:
gt_df.columns

Index(['timestamp', 'timestamp_diff', 'step', 'user_idx', 'session_idx',
       'sum_action_item_before', 'action_type_item_idx', 'action_type_idx',
       'list_action_type_idx', 'list_reference_search_for_poi',
       'list_reference_change_of_sort_order',
       'list_reference_search_for_destination',
       'list_reference_filter_selection',
       'list_reference_interaction_item_image_idx',
       'list_reference_interaction_item_rating_idx',
       'list_reference_clickout_item_idx',
       'list_reference_interaction_item_deals_idx',
       'list_reference_search_for_item_idx',
       'list_reference_interaction_item_info_idx', 'list_current_filters',
       'platform_idx', 'device_idx', 'current_filters', 'impressions',
       'prices', 'clicked', 'list_mean_price', 'impressions_popularity',
       'pos_item_idx', 'item_idx', 'price', 'view', 'is_first_in_impression',
       'first_item_idx', 'popularity_item_idx', 'diff_price', 'user_view',
       'hist_views', 'ps', 'n_user

In [25]:
gt_df['clicked']

1        1.0
31       1.0
51       1.0
81       1.0
97       1.0
        ... 
44540    1.0
44566    1.0
44595    1.0
44603    1.0
44629    1.0
Name: clicked, Length: 1826, dtype: float64