In [1]:
from numpy.random import choice, normal, uniform, binomial
from numpy import sum, mean, zeros, array, NaN
import pandas as pd
from math import comb
from numpy.random import seed

In [2]:
pd.options.mode.chained_assignment = None
seed(1234)

# Simulating Data

We will simulate two types of users (enthusiasts and normal), with proportion `prob_enthusiast`. Enthusiast have a higher impact on the outcome `enthusiast_effect` and also a higher acceptance rate `inc_enth_cons` of doing the recommended items. So the type of users is an **unobserved confounder**. 

Each user does `sessions_n` sessions, and at each session, there are only `k` items recommended. There is an underlying recommender system (the recommender system by default) that always recommends the last `k` items for normal users, while always recommends items 0:`k` to enthusiasts. Every session, with probability `exploration_prob` the items are recommended uniformly at random. 

The outcome is calculated as follows. Each item `i` has an additve causal impact of `i` (to make it easy the index of the item is the same causal impact). The outcome is the addition of each item consumed, plus some noise `delta`. As already said, enthusiast have an extra bonus of `enthusiast_effect` in the outcome.

In [3]:
items_n = 5
patients_n = 10000
sessions_n = 2
k = 3
delta = 0.1
exploration_prob = 0.8

prob_enthusiast = 0.2
enthusiast_effect = 10
base_consumption = 0.2
inc_enth_cons = 0.8

In [4]:
ids = []
sessions = []
patient_types = []
items = []
recommendations = []
consumptions = []
originals = []
explorations = []

items_ids = list(range(items_n))
for id in range(patients_n):
    patient_type = choice([0, 1], size=1, p=[1-prob_enthusiast, prob_enthusiast])[0]
    for session in range(sessions_n):
        exploration = binomial(1, exploration_prob, size=1)[0]
        original = None
        if patient_type == 1:
            original = items_ids[:k]
        else:
            original = items_ids[-k:]
        if exploration == 1:
            recommended = choice(items_ids, size=k, replace=False)
        else:
            recommended = original
            
        recommended = [int(item in recommended) for item in items_ids]     
        original = [int(item in original) for item in items_ids]     
        
        consumption = []
        for item in range(items_n):
            score_assign = base_consumption + inc_enth_cons*patient_type
            item_cons = int(uniform(size=1)[0] <= score_assign)
            item_cons *= recommended[item]
            consumption.append(item_cons)
        
        ids += [id]*items_n
        sessions += [session]*items_n
        patient_types += [patient_type]*items_n
        items += items_ids
        recommendations += recommended
        consumptions += consumption
        explorations += [exploration]*items_n
        originals += original
    
df = pd.DataFrame({
    'user_id': ids,
    'session': sessions, 
    'patient_type': patient_types,
    'item': items, 
    'original': originals,
    'recommended': recommendations,
    'consumed': consumptions,
    'exploration': explorations
})

outcomes = df.groupby('user_id').apply(lambda x: 
    (sum(x['item']*x['consumed']) + 
    mean(x['patient_type'])*enthusiast_effect + 
    normal(size=1, scale=delta))[0]
).reset_index()
outcomes.rename(columns={0:'outcome'}, inplace = True)
df = df.merge(outcomes, on='user_id')

In [5]:
df.head(n=items_n*sessions_n)

Unnamed: 0,user_id,session,patient_type,item,original,recommended,consumed,exploration,outcome
0,0,0,0,0,0,0,0,1,-0.062476
1,0,0,0,1,0,1,0,1,-0.062476
2,0,0,0,2,0,1,0,1,-0.062476
3,0,0,0,3,1,0,0,1,-0.062476
4,0,0,0,4,1,0,0,1,-0.062476
5,0,1,0,0,0,0,0,0,-0.062476
6,0,1,0,1,0,0,0,0,-0.062476
7,0,1,0,2,0,0,0,0,-0.062476
8,0,1,0,3,1,1,0,0,-0.062476
9,0,1,0,4,1,1,0,0,-0.062476


In [6]:
recs = df.pivot(index=['user_id', 'session', 'exploration'], columns='item', values='recommended').reset_index().merge(outcomes, on='user_id')
consumes = df.pivot(index=['user_id', 'session', 'exploration'], columns='item', values='consumed').reset_index().merge(outcomes, on='user_id')
originals = df.pivot(index=['user_id', 'session', 'exploration'], columns='item', values='original').reset_index().merge(outcomes, on='user_id')

In [7]:
recs.head(sessions_n)

Unnamed: 0,user_id,session,exploration,0,1,2,3,4,outcome
0,0,0,1,0,1,1,0,0,-0.062476
1,0,1,0,0,0,0,1,1,-0.062476


In [8]:
consumes.head(sessions_n)

Unnamed: 0,user_id,session,exploration,0,1,2,3,4,outcome
0,0,0,1,0,0,0,0,0,-0.062476
1,0,1,0,0,0,0,0,0,-0.062476


In [9]:
originals.head(sessions_n)

Unnamed: 0,user_id,session,exploration,0,1,2,3,4,outcome
0,0,0,1,0,0,0,1,1,-0.062476
1,0,1,0,0,0,0,1,1,-0.062476


# Basic Stats

In [10]:
df[df.recommended==1].groupby('patient_type').agg({'consumed': [mean]})

Unnamed: 0_level_0,consumed
Unnamed: 0_level_1,mean
patient_type,Unnamed: 1_level_2
0,0.198567
1,1.0


In [11]:
df.groupby('patient_type').agg({'outcome': [mean]})

Unnamed: 0_level_0,outcome
Unnamed: 0_level_1,mean
patient_type,Unnamed: 1_level_2
0,1.825819
1,16.928423


# Real Impact of Recommendations

This are the difference of causal impact between pairs of items (overlook the zeros, the matrix is actually symetric)

In [12]:
expected_diff_items = zeros((items_n, items_n))

expected_compliers = base_consumption*(1-prob_enthusiast) + (base_consumption + inc_enth_cons)*prob_enthusiast

for item_1 in range(items_n):
    for item_2 in range(items_n):
        expected_diff_items[item_1, item_2] = (item_1 - item_2)*expected_compliers

expected_diff_items = pd.DataFrame(expected_diff_items.round(2))
expected_diff_items

Unnamed: 0,0,1,2,3,4
0,0.0,-0.36,-0.72,-1.08,-1.44
1,0.36,0.0,-0.36,-0.72,-1.08
2,0.72,0.36,0.0,-0.36,-0.72
3,1.08,0.72,0.36,0.0,-0.36
4,1.44,1.08,0.72,0.36,0.0


# Direct Estimation Recommendations

This is the difference between recommendations obtained by trivial computations. You can see they are biased.

In [13]:
diff_items_direct = zeros((items_n, items_n))

for item_1 in range(items_n):
    for item_2 in range(items_n):
        try: 
            inds_1_0 = recs.loc[:, item_1] == 1
            inds_2_0 = recs.loc[:, item_2] == 1
            res_1 = recs[inds_1_0].loc[:, 'outcome'].mean() - recs[inds_2_0].loc[:, 'outcome'].mean()
        except:
            res_1 = NaN
        diff_items_direct[item_1, item_2] = res_1
            
diff_items_direct = pd.DataFrame(diff_items_direct.round(2))
diff_items_direct

Unnamed: 0,0,1,2,3,4
0,0.0,-0.27,0.53,1.11,0.88
1,0.27,0.0,0.8,1.39,1.16
2,-0.53,-0.8,0.0,0.58,0.35
3,-1.11,-1.39,-0.58,0.0,-0.23
4,-0.88,-1.16,-0.35,0.23,0.0


In comparison with the theoretical results

In [14]:
expected_diff_items

Unnamed: 0,0,1,2,3,4
0,0.0,-0.36,-0.72,-1.08,-1.44
1,0.36,0.0,-0.36,-0.72,-1.08
2,0.72,0.36,0.0,-0.36,-0.72
3,1.08,0.72,0.36,0.0,-0.36
4,1.44,1.08,0.72,0.36,0.0


# Our Method

We provide the function `estimate_differences` that calculates the differences in impacts of recommendations using the method explained in the paper for the case of $\varepsilon$-exploration recommender systems. The inputs of the function are:

- `df` a pandas.DataFrame where each row is at user-session-item level. It is only necessary to contain rows with items that they have been either recommended or consumed after the session. The data frame requires the following columns:
    - `user_id`: user id
    - `session`
    - `item`
    - `original`: whether the item was recommended in the underlying recommender system or not
    - `recommended`: whether the item was recommended or not
    - `exploration`: whether the session recommended at random or not
    - `outcome`
- `k`: number of simultaneous recommendations
- `exploration_prob`: exploration probability

In [15]:
def estimate_differences(df, k, exploration_prob):

    outcomes = df.groupby(['user_id', 'session'], as_index=False).outcome.mean()
    recs = df.pivot_table(
        index=['user_id', 'session', 'exploration'], 
        columns='item', values='recommended', fill_value=0). \
        reset_index().merge(outcomes, on='user_id')
    originals = df.pivot_table(
        index=['user_id', 'session', 'exploration'], 
        columns='item', values='original', fill_value=0). \
        reset_index().merge(outcomes, on='user_id')

    items_ids = df.item.unique()
    N = len(items_ids)
    diff_items = zeros((N, N))
    q = exploration_prob/comb(N-2, k-1)

    for item_1 in range(items_n):
        for item_2 in range(items_n):
            other_itemments = [t for t in items_ids if t not in [item_1, item_2]]

            # Calculate Propensity Scores
            L = recs[other_itemments]*originals[other_itemments]
            L += (1 -recs[other_itemments])*(1-originals[other_itemments])
            L = (L.apply(sum, axis=1) == N-2).astype(int)
            eta = q/(q + L*(1-exploration_prob))
            propensity_scores = eta/2 + L*originals[item_1]*(1-eta)

            # Calculating Adjustment Formula
            inds = recs[item_1] != recs[item_2]
            diff_data = recs[inds]
            diff_data['propensity_scores'] = propensity_scores[inds]
            diff_ate = 0
            for control_vars, sub_data in diff_data.groupby('propensity_scores'):
                prop = sub_data.shape[0]/diff_data.shape[0]
                res = sub_data.groupby(item_1).outcome.mean()
                if res.shape[0] > 1:
                    diff_ate += (res.iloc[1] - res.iloc[0])*prop
            diff_items[item_1, item_2] = diff_ate

    return(pd.DataFrame(diff_items))

In [16]:
diff_items = estimate_differences(df, k, exploration_prob).round(2)
diff_items

Unnamed: 0,0,1,2,3,4
0,0.0,-0.41,-0.79,-1.09,-1.52
1,0.41,0.0,-0.33,-0.69,-1.09
2,0.79,0.33,0.0,-0.39,-0.77
3,1.09,0.69,0.39,0.0,-0.47
4,1.52,1.09,0.77,0.47,0.0


In comparison with the expected results

In [17]:
expected_diff_items

Unnamed: 0,0,1,2,3,4
0,0.0,-0.36,-0.72,-1.08,-1.44
1,0.36,0.0,-0.36,-0.72,-1.08
2,0.72,0.36,0.0,-0.36,-0.72
3,1.08,0.72,0.36,0.0,-0.36
4,1.44,1.08,0.72,0.36,0.0


# Ranking results

In [18]:
def rank_differences(results):
    rank = results.iloc[:, 0]
    for col in range(1, results.shape[0]):
        rank += results.iloc[:, col] - results.iloc[col, 0]
    rank /= results.shape[0]
    return(rank.sort_values(ascending=False))

In [19]:
rank_differences(expected_diff_items)

4    0.00
3   -0.36
2   -0.72
1   -1.08
0   -1.44
Name: 0, dtype: float64

In [20]:
rank_differences(diff_items)

4    0.000
3   -0.430
2   -0.778
1   -1.110
0   -1.532
Name: 0, dtype: float64

In [21]:
rank_differences(diff_items_direct)

1    1.156
0    0.882
2    0.352
4    0.000
3   -0.230
Name: 0, dtype: float64