# Simulating a dynamic recommendation setting

In [1]:
import numpy as np
import pandas as pd
import sys
import pickle

import torch
from torch.utils.data import DataLoader, random_split

from tqdm import tqdm

sys.path.append('..')

from simulationConstants import ML_1M_1K_SAMPLE_FILLED_PATH, ML_1M_1K_SAMPLE_FILLED_PATH_PKL

In [2]:
synthetic_data_matrix = pd.read_csv("../data/simulation/sinthetic_data_1k_sample.csv")

In [3]:
synthetic_data_matrix

Unnamed: 0,user,item,clicked_and_examined,clicked_at,timestamp
0,10,2804,1.0,1.0,21.445333
1,10,2502,1.0,2.0,26.218368
2,10,3639,,,
3,10,3204,1.0,4.0,32.883131
4,10,47,1.0,5.0,43.801014
...,...,...,...,...,...
999995,6036,1003,,,
999996,6036,2285,,,
999997,6036,1183,0.0,,
999998,6036,2735,,,


In [4]:
len(synthetic_data_matrix["user"].drop_duplicates())

1000

In [5]:
feedback = synthetic_data_matrix.rename(columns={"clicked_and_examined": "relevant", "clicked_at": "click"})

In [6]:
initial_date = feedback["timestamp"].max()

In [7]:
movielensOraclePreferenceMatrix = pd.read_csv(f"../{ML_1M_1K_SAMPLE_FILLED_PATH}").drop(columns=["Unnamed: 0"])

In [8]:
movielensOraclePreferenceMatrix

Unnamed: 0,user,item,genres,rating
0,10,2622,comedy|fantasy,1
1,10,648,action|adventure|mystery,1
2,10,2628,action|adventure|fantasy|sci-fi,0
3,10,3358,comedy|romance,1
4,10,3359,drama,0
...,...,...,...,...
3463995,6036,2246,comedy,0
3463996,6036,559,comedy,0
3463997,6036,3866,comedy,0
3463998,6036,793,drama,0


## Simulation process

To simulate users, we'll: 

1. Retain 1000 random users from our synbtethic_data_matrix
2. For each k iteration in the simulation:
    3. Recommend 20 items to the user
        a. For calibration: we recommend 100 but calibrate to 20. This is done to increase recall before running the reranking
    4. Log the clicks 
5. Every 50 iteractions, retrain the recommender
6. Run this for 10_000 iteractions

In [9]:
from simulationUtils import get_user_feedback_for_item

In [10]:
from simulationConstants import ML_DATA_PATH

In [11]:
with open('userToExpDistribution.pkl', 'rb') as f:
    movielensUserToExpDistribution = pickle.load(f)

In [12]:
initial_date = None

In [13]:
unique_users = list(feedback["user"].drop_duplicates())

In [14]:
if initial_date is None:
    initial_date = pd.Timestamp.now().timestamp()
user_to_up_to_date_timestamp = pd.DataFrame({
    "user": unique_users, 
    "delta_from_start": 0.0
})
user_to_up_to_date_timestamp["timestamp_dist"] = user_to_up_to_date_timestamp["user"].map(movielensUserToExpDistribution)

In [15]:
def map_recommendation_to_feedback(user, rec_list, matrix, userToExpDistribution):
    results = []
    max_delta = 0
    for idx, item in enumerate(rec_list):
        user, item, feedback, clicked_at, delta  = get_user_feedback_for_item(user, item, idx+1, matrix, userToExpDistribution)
        if (delta is not None and delta > max_delta):
            max_delta = delta
        feedback = (user, item, feedback, clicked_at, delta)
        results.append(feedback)
    return results, max_delta

In [16]:
def simulate_user_feedback(user, candidate_items, model, preference_matrix, k, user_to_up_to_date_timestamp, userToExpDistribution):
    user_tensor = torch.tensor([user], device=device)
    rec, _ = model.predict(user_tensor, candidate_items, k)
    row, max_delta = map_recommendation_to_feedback(user, rec, preference_matrix, userToExpDistribution)
    user_to_up_to_date_timestamp.loc[user_to_up_to_date_timestamp["user"] == user, "delta_from_start"] += max_delta
    return row, user_to_up_to_date_timestamp

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [18]:
sys.path.append('/home/caio/dev/bprMf')

In [19]:
from bpr_mf import bprMFLClickDebiasingDataloader, bprMF, bpr_train_with_debiasing, bpr_loss_with_reg_with_debiased_click
from utils import generate_bpr_dataset_with_click_data

In [20]:
bpr_dataset = generate_bpr_dataset_with_click_data(feedback, num_negatives=5)

In [21]:
bpr_dataset

Unnamed: 0,user,pos_item,click_position,neg_item
0,10,2804,1,1019
1,10,2804,1,2671
2,10,2804,1,492
3,10,2804,1,3080
4,10,2804,1,1583
...,...,...,...,...
546020,6033,2058,19,1558
546021,6033,2058,19,1527
546022,6033,2058,19,2468
546023,6033,2058,19,1564


In [22]:
n_users = bpr_dataset["user"].max() + 1
n_items = bpr_dataset.pos_item.max() + 1


In [23]:
n_users, n_items

(6037, 3953)

In [24]:
def train(model, data, train_ratio=0.8, debug=False):

    bpr_dataset = generate_bpr_dataset_with_click_data(data, num_negatives=5)
    data_bpr = bprMFLClickDebiasingDataloader(bpr_dataset)


    train_len = int(train_ratio * len(data_bpr))
    test_len = len(data_bpr) - train_len


    train_data, test_data = random_split(data_bpr, [train_len, test_len])



    dataloader_bpr_train = DataLoader(train_data, batch_size=256, shuffle=True)
    dataloader_bpr_test = DataLoader(test_data, batch_size=256, shuffle=True)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


    _, _ = bpr_train_with_debiasing(
        train_data_loader=dataloader_bpr_train,
        test_data_loader=dataloader_bpr_test,
        model=model,
        bpr_loss=bpr_loss_with_reg_with_debiased_click,
        optimizer=optimizer,
        reg_lambda=5e-4,
        debug=debug
    )

    return model

In [25]:
sys.path.append("/home/caio/dev/calibratedRecs")
sys.path.append("/home/caio/dev/calibratedRecs/constants.py")


In [26]:
from calibrationUtils import preprocess_genres

In [27]:

from constants import ITEM_COL, GENRE_COL, USER_COL
from metrics import mace

In [28]:
with open(f"../{ML_1M_1K_SAMPLE_FILLED_PATH_PKL}", "rb") as f:
    ml_filled = preprocess_genres(pickle.load(f).drop_duplicates())



In [29]:

ml_item_to_genre = (
    ml_filled[[ITEM_COL, GENRE_COL]]
    .set_index(ITEM_COL)[GENRE_COL]
    .to_dict()
)

In [30]:

def get_candidate_items(user, D, unique_items):
    user_history = set(D[D["user"] == user]["item"])
    candidate_items = [item for item in unique_items if item not in user_history]
    return torch.tensor(candidate_items, device=device)

def simulate(D, model, unique_users, unique_items, oracleMatrix, userToExpDistribution, item2genreMap, k=100, rounds=1000, L=10, initial_date=None):
    """
    Given unique users and unique items, recommend up to k items to every user
    using a preference matrix as a relevancy model and using a click model
    to simulate probability of user examinating an item.

    Feedback signal will be fed to the D matrix.

    We run the boostrap process for a total of an arbitrary number of rounds,
    in order to ensure enough feedback data to train a model.
    """


    # Setup initial data
    unique_genres_in_items = set(g for genres in item2genreMap.values() for g in genres)
    n_genres = len(unique_genres_in_items)
    user2history = D.groupby(USER_COL).agg({ITEM_COL: list}).to_dict()[ITEM_COL]

    if initial_date is None:
        initial_date = pd.Timestamp.now().timestamp()
    user_to_up_to_date_timestamp = pd.DataFrame({
        "user": unique_users,
        "delta_from_start": 0.0
    })
    maces = []
    user_to_up_to_date_timestamp["timestamp_dist"] = user_to_up_to_date_timestamp["user"].map(userToExpDistribution)
    new_df = D.copy()
    for round in tqdm(range(1, rounds + 1), desc="Rounds"):
        rows_to_append = []
        for user in unique_users:
            candidate_items = get_candidate_items(user, D, unique_items)
            row, user_to_up_to_date_timestamp = simulate_user_feedback(
                user, candidate_items, model, oracleMatrix, k, user_to_up_to_date_timestamp, movielensUserToExpDistribution
            )
            rows_to_append.extend(row)
        recommendation_df = pd.DataFrame(rows_to_append, columns=new_df.columns)
        if (round % L == 0):
            print("retraining model...")
            model = train(model, new_df)
            print("Calculating mace")
            rec_df_grouped = recommendation_df.groupby(USER_COL).agg({ITEM_COL: list}).reset_index().rename(columns={ITEM_COL: "rec"})
            iteration_mace = mace(df=rec_df_grouped, user2history=user2history, recCol='rec', n_genres=n_genres, item2genreMap=item2genreMap)
            maces.append(iteration_mace)
        if (round % 100 == 0):
            recommendation_df.to_csv(f"data/movielens/no_calibration_sim_up_to_round_{round}")
        new_df = pd.concat([new_df, recommendation_df], ignore_index=True)
    final_df = pd.concat([D, new_df])
    final_df.loc[final_df["timestamp"].notnull(), "timestamp"] += initial_date
    return final_df, maces

In [31]:
unique_items = list(feedback["item"].unique())
unique_users = list(feedback["user"].unique())

In [32]:
model = bprMF(num_users=n_users, num_items=n_items, factors=30).to(device)
model = train(model, feedback)

In [33]:
movielensOraclePreferenceMatrix

Unnamed: 0,user,item,genres,rating
0,10,2622,comedy|fantasy,1
1,10,648,action|adventure|mystery,1
2,10,2628,action|adventure|fantasy|sci-fi,0
3,10,3358,comedy|romance,1
4,10,3359,drama,0
...,...,...,...,...
3463995,6036,2246,comedy,0
3463996,6036,559,comedy,0
3463997,6036,3866,comedy,0
3463998,6036,793,drama,0


In [34]:
feedback

Unnamed: 0,user,item,relevant,click,timestamp
0,10,2804,1.0,1.0,21.445333
1,10,2502,1.0,2.0,26.218368
2,10,3639,,,
3,10,3204,1.0,4.0,32.883131
4,10,47,1.0,5.0,43.801014
...,...,...,...,...,...
999995,6036,1003,,,
999996,6036,2285,,,
999997,6036,1183,0.0,,
999998,6036,2735,,,


In [35]:
feedback_final, maces = simulate(
    D=feedback,
    model=model,
    unique_users=unique_users,
    unique_items=unique_items,
    oracleMatrix=movielensOraclePreferenceMatrix,
    userToExpDistribution=movielensUserToExpDistribution,
    k=100,
    rounds=5,
    L=2,
    initial_date=0.0,
    item2genreMap=ml_item_to_genre
)

Rounds:  20%|██        | 1/5 [02:02<08:10, 122.60s/it]

retraining model...
Calculating mace


100%|██████████| 1000/1000 [00:24<00:00, 41.61it/s]
Rounds:  60%|██████    | 3/5 [07:25<04:54, 147.49s/it]

retraining model...
Calculating mace


100%|██████████| 1000/1000 [00:24<00:00, 41.14it/s]
Rounds: 100%|██████████| 5/5 [13:02<00:00, 156.47s/it]


In [38]:
maces

[0.016945920519569276, 0.01713526166014959]

In [36]:
feedback[feedback["item"] == 2502]

Unnamed: 0,user,item,relevant,click,timestamp
1,10,2502,1.0,2.0,26.218368
5641,1608,2502,,,
8977,2624,2502,,,
16032,4899,2502,0.0,,
18852,5642,2502,0.0,,
...,...,...,...,...,...
983408,937,2502,,,
985972,1698,2502,,,
986494,1818,2502,,,
991362,3372,2502,,,


In [37]:
feedback_final

Unnamed: 0,user,item,relevant,click,timestamp
0,10,2804,1.0,1.0,21.445333
1,10,2502,1.0,2.0,26.218368
2,10,3639,,,
3,10,3204,1.0,4.0,32.883131
4,10,47,1.0,5.0,43.801014
...,...,...,...,...,...
1499995,6036,3735,,,
1499996,6036,3671,,,
1499997,6036,1244,,,
1499998,6036,2726,,,
