In [None]:
import pandas as pd
import random
import numpy as np
from os import path, makedirs
import time
from copy import deepcopy

author: Diego Carraro, on the 01/04/2021
contact: diego.carraro89@gmail.com

Python version: 3

<h1>Debiasing the Offline Evaluation of Recommender Systems</h1>

This code is divided into three main sections:
1. "Setting of parameters for the evaluation", where the user can tune the evaluation
2. "To prepare the data for the evaluation", where the data for the evaluation is produced
3. "Methods", that contains the methods used for the data preparation

<h3>1 Setting of parameters for the evaluation</h3>

In [None]:
DATASET = 'DatasetSample'

In [None]:
# params for the splits and the intervention
dataset_path = 'datasets/' + DATASET + '/'
out_path = 'splits/' + DATASET + '/'
do_validation = True # if to generate split to perform validation of hyperparameters

n_splits = 2
rho_test = 0.4 # heldout test proportion
rho_p = 0.5 # intervention proportion
rho_wei = 0.15 # proportion for the weights
rho_val = 0.15 # validation proportion
intervention_strategies = ['FULL', 'RND', 'SKEW', 'WTD', 'WTD_H']

<h3>2. To prepare the data for the evaluation</h3>

In [None]:
MAR_name = 'MAR_ratings'
MNAR_name = 'MNAR_ratings'

df_ratings_MAR = load_dataset(dataset_path, MAR_name, extension='.csv')
df_ratings_MNAR = load_dataset(dataset_path, MNAR_name, extension='.csv')

# filter MNAR dataset to have the same users of MAR dataset
df_ratings_MNAR = df_ratings_MNAR.loc[df_ratings_MNAR['userId'].isin(df_ratings_MAR.userId.unique())]

# retrieve list of users and items in the datasets
d = list(set(df_ratings_MAR.userId.unique()).union(df_ratings_MNAR.userId.unique()))
users = pd.DataFrame(data={'userId': d})
d = list(set(df_ratings_MAR.itemId.unique()).union(df_ratings_MNAR.itemId.unique()))
items = pd.DataFrame(data={'itemId': d})

In [None]:
start = time.time()
for fold in range(n_splits):
    fold_dir = out_path + 'fold_' + str(fold) + '/'
    if not path.exists(fold_dir):
        makedirs(fold_dir)

    users.to_csv(fold_dir + 'users.csv', index=False, sep='\t', header=False)
    items.to_csv(fold_dir + 'items.csv', index=False, sep='\t', header=False)
    
    # split into training and heldout test
    training, testing = simple_random_split(df_ratings_MNAR, rho_test)
    training.to_csv(fold_dir + 'training.csv', index=False, sep='\t', header=True)
    testing.to_csv(fold_dir + 'heldout_test.csv', index=False, sep='\t', header=True)
    
    # reserve a percentage of the MAR dataset for calculation of the weights for WTD
    df_test_mar, df_weights_calculation = simple_random_split(df_ratings_MAR, rho_wei)
    
    # print the unbiased MAR ground truth test set
    df_test_mar.to_csv(fold_dir + 'GT_test.csv', index=False, sep='\t', header=False)

    if do_validation:
        df_test_mar, df_validation = simple_random_split(df_test_mar, rho_val)
        df_validation.to_csv(fold_dir + 'validation_test.csv', index=False, sep='\t', header=True)
        
    for strategy in intervention_strategies:
        test_name = ''
        intervened_test = None

        if strategy == 'RND':
            # random sampling the intervened test set
            to_discard, intervened_test = simple_random_split(testing, rho_p)
            test_name = 'RND_intervened_test.csv'

        elif strategy == 'FULL':
            # keep the entire test set
            intervened_test = deepcopy(testing)
            test_name = 'FULL_intervened_test.csv'

        elif strategy == 'SKEW':
            to_discard, intervened_test = skew_intervention(testing, training, set(items['itemId'].values), rho_p)
            test_name = 'SKEW_intervened_test.csv'
            
        elif strategy == 'WTD' or strategy == 'WTD_H':
            if strategy == 'WTD_H':
                use_ideal_mar_distr = True
            else:
                use_ideal_mar_distr = False

            user_weights, item_weights = calculate_weights(training, df_weights_calculation,
                                                           use_ideal_mar_distr,
                                                           set(users['userId'].values),
                                                           set(items['itemId'].values))
            
            to_discard, intervened_test = weighted_intervention(testing, user_weights, item_weights, rho_p)
            test_name = strategy + '_intervened_test.csv'

        # write it
        intervened_test.to_csv(fold_dir + test_name, index=False, sep='\t', header=True)

    print("time elapsed", time.time() - start)

<h3>3. Methods</h3>

In [None]:
def load_dataset(pathh, name, extension='.csv'):
    ratings = pd.read_csv(pathh + name + extension, sep='\t')
    print('dataset loaded')
    return ratings

def simple_random_split(df, frac):
    df_test = df.copy()
    n_ratings = len(df_test)
    test_idx = np.random.choice(n_ratings, size=int(frac * n_ratings), replace=False)

    mask = np.zeros(n_ratings, dtype=bool)
    mask[test_idx] = True

    test = df_test[mask]
    train = df_test[~mask]
    test.reset_index(drop=True, inplace=True)
    train.reset_index(drop=True, inplace=True)

    return train, test

In [None]:
def skew_intervention(df_testing, df_training, all_items, frac):
    # calculate pop scores
    items_pop = df_training['itemId'].value_counts(sort=True, ascending=True, normalize=False)
    pop_dict = items_pop.to_dict()
    pop_dict = {k: 1 / v for k, v in pop_dict.items()}

    # for items not included in the training set, set a random probability
    not_in_training = all_items.difference(pop_dict.keys())
    for ii in not_in_training:
        v = random.random()
        pop_dict[ii] = v

    df_test = df_testing.copy()
    n_ratings = len(df_testing)

    df_test['p_item'] = df_test['itemId'].apply(lambda x: pop_dict[x])
    # normalize the probabilities
    p_sum = df_test['p_item'].sum()
    df_test['p_item'] = df_test['p_item'].apply(lambda x: x / p_sum)

    test_idx = np.random.choice(n_ratings, size=int(frac * n_ratings), replace=False, p=df_test['p_item'].values)
    mask = np.zeros(n_ratings, dtype=bool)
    mask[test_idx] = True

    del df_test['p_item']
    test = df_test[mask]
    train = df_test[~mask]
    test.reset_index(drop=True, inplace=True)
    train.reset_index(drop=True, inplace=True)

    return train, test

In [None]:
def weighted_intervention(df_testing, user_w, item_w, frac):

    # calculate P(S|u,i,w) for every pair in df_testing
    p_s = np.zeros(len(df_testing))

    for ix, row in df_testing.iterrows():
        p_s[ix] = user_w[row['userId']] * item_w[row['itemId']]
    
    # normalize p_s
    tot = sum(p_s)
    p_s = np.array(p_s) / tot

    # perform the sampling with P(S|u,i,w)
    df_test = df_testing.copy()
    n_ratings = len(df_test)
    test_idx = np.random.choice(n_ratings, size=int(frac * n_ratings), replace=False, p=p_s)

    mask = np.zeros(n_ratings, dtype=bool)
    mask[test_idx] = True

    test = df_test[mask]
    train = df_test[~mask]
    test.reset_index(drop=True, inplace=True)
    train.reset_index(drop=True, inplace=True)

    return train, test

In [None]:
def calculate_weights(df_training, df_mar, use_ideal, usrs, itms):
    # calculate P(u|O) for MAR
    p_u_O_MAR = calculate_p_u_o(df_mar, usrs, use_ideal)

    # calculate P(u|O) for MNAR
    p_u_O_MNAR = calculate_p_u_o(df_training, usrs, False)

    # calculate P(i|O) for MAR
    p_i_O_MAR = calculate_p_i_o(df_mar, itms, use_ideal)

    # calculate P(i|O) for MNAR
    p_i_O_MNAR = calculate_p_i_o(df_training, itms, False)

    w_users = initialize_weights(p_u_O_MNAR.keys())
    w_items = initialize_weights(p_i_O_MNAR.keys())

    w_users = exact_weight_calculation(p_u_O_MAR, p_u_O_MNAR, w_users)
    w_items = exact_weight_calculation(p_i_O_MAR, p_i_O_MNAR, w_items)

    # normalize weights
    w_users = norm(w_users)
    w_items = norm(w_items)

    return w_users, w_items

def initialize_weights(list_of):
    w = {}
    for e in list_of:
        w[e] = random.random()
    return w

def exact_weight_calculation(p_mar, p_mnar, gd_w):
    for ii in gd_w:
        gd_w[ii] = p_mar[ii] / p_mnar[ii]
    return gd_w


def norm(vect):
    sum_vect = sum(list(vect.values()))
    for key in vect:
        vect[key] = vect[key] / sum_vect
    return vect

def calculate_p_u_o(df, usrs, use_ideal):
    n = len(df)
    ideal_distr = 1 / len(usrs)
    p_u_o = {}
    for user in usrs:
        if use_ideal:
            p_u_o[user] = ideal_distr
        else:
            p_u_o[user] = len(df[df['userId'] == user]) / n
        if p_u_o[user] == 0:
            p_u_o[user] = 0.0001
    return p_u_o


def calculate_p_i_o(df, itms, use_ideal):
    n = len(df)
    ideal_distr = 1 / len(itms)
    p_i_o = {}
    for item in itms:
        if use_ideal:
            p_i_o[item] = ideal_distr
        else:
            p_i_o[item] = len(df[df['itemId'] == item]) / n
        if p_i_o[item] == 0:
            p_i_o[item] = 0.0001
    return p_i_o