# Bootstrapping user preferences



Similarly to (Zhu et al., 2021), we'll evaluate a dynamic recommendation system using:

1. An oracle preference model (Generated in notebook 00-preference_model)
2. A position bias examination model
3. Bootstrapped user preferences

Step 3 is important so that we're simulating exposing users to fresh items, using their previous preferences (provided by step 1) and their examination bias (step 2) to determine a sinthethical rating matrix dataset that we'll use to compare different calibration approaches as the user gets exposed to more and more items.

In [1]:
import pandas as pd
from tqdm import tqdm

In [33]:
import numpy as np
import math
import random

In [2]:
import sys
import os
from pathlib import Path

sys.path.append('..')

In [3]:
from constants import ML_1M_FILLED_PATH

In [21]:
def get_user_preference_for_item(user, item, matrix):
    user_ratings = matrix[matrix["user"] == user]
    return user_ratings[user_ratings["item"] == item].rating.item()

In [37]:
def click_model(k):
    lambda_k = 1/np.log(k+1,2)
    examination_probability = random.random()
    if examination_probability <= lambda_k:
        return True
    return False

In [43]:
def get_user_feedback_for_item(user, item ,k, matrix, preference_threshold=4):
    preference = get_user_preference_for_item(user, item, matrix)
    observed = click_model(k)
    relevant = preference >= preference_threshold
    should_click = observed and relevant
    if (observed):
        feedback = should_click
    # feedback = 1 if user examined and clicked, 0 if user examined and not clicked,
    # None if otherwise
    return user, item, feedback

In [39]:
def random_rec(items, u, k, D):
    user_history = set(D[D["user"] == u]["item"])
    candidate_items = list(set(items) - user_history)
    return random.sample(candidate_items, k)

In [9]:
preference_matrix = pd.read_csv(f"../{ML_1M_FILLED_PATH}").drop(columns=["Unnamed: 0"])

In [10]:
preference_matrix

Unnamed: 0,user,item,rating
0,1,1193,5.000000
1,1,661,3.000000
2,1,914,3.000000
3,1,3408,4.000000
4,1,2355,5.000000
...,...,...,...
22384235,6040,2198,2.990271
22384236,6040,2703,3.182332
22384237,6040,2845,3.090408
22384238,6040,3607,3.034246


In [18]:
a = preference_matrix[preference_matrix["user"] == 1]

In [22]:
get_user_preference_for_item(1, 1193, preference_matrix)

5.0

In [13]:
unique_users = list(preference_matrix["user"].unique())

unique_items = list(preference_matrix["item"].unique())

In [40]:
click_matrix = pd.DataFrame(columns=["user", "item", "relevancy"])