In [1]:
from numpy.random import choice, normal, uniform, binomial
from numpy import sum, mean, zeros, array, NaN, floor, sqrt
import pandas as pd
from math import comb
from numpy.random import seed
import numpy as np
import itertools
from tqdm import tqdm

In [2]:
pd.options.mode.chained_assignment = None
seed(1234)

# Simulating Data

We will simulate two types of users (enthusiasts and normal), with proportion `prob_enthusiast`. Enthusiast have a higher impact on the outcome `enthusiast_effect` and also a higher acceptance rate `inc_enth_cons` of doing the recommended items. So the type of users is an **unobserved confounder**. 

Each user does `sessions_n` sessions, and at each session, there are only `k` items recommended. There is an underlying recommender system (the recommender system by default) that always recommends items 0:`k` for normal users, while always recommends the last `k` items to enthusiasts. Every session, with probability `exploration_prob` the items are recommended uniformly at random. 

The outcome is calculated as follows. Each treatment `i` has an additve causal impact of `i` (to make it easy the index of the item is the same causal impact). The outcome is the addition of each item consumed, plus some noise `delta`. As already said, enthusiast have an extra bonus of `enthusiast_effect` in the outcome.

In [3]:
treatments_n = 5
patients_n = 10000
sessions_n = 2
k = 3
delta = 0.1
exploration_prob = 0.5

prob_enthusiast = 0.4
enthusiast_effect = 10
base_consumption = 0.2
inc_enth_cons = 0.4

In [4]:
ids = []
sessions = []
patient_types = []
treatments = []
recommendations = []
consumptions = []
originals = []
explorations = []

treats_ids = list(range(treatments_n))

k_i = int(floor(k/2))
k_e = int(k - k_i)

k_i = 1
k_e = k - 1


for id in range(patients_n):
    patient_type = choice([0, 1], size=1, p=[1-prob_enthusiast, prob_enthusiast])[0]
    for session in range(sessions_n):
        exploration = binomial(1, exploration_prob, size=1)[0]
        original = None
        if patient_type == 1:
            #original = treats_ids[:k]
            original = treats_ids[:k_i] + treats_ids[-k_e:]
        else:
            #original = treats_ids[-k:]
            original = treats_ids[:k_e] + treats_ids[-k_i:]
        if exploration == 1:
            recommended = choice(treats_ids, size=k, replace=False)
        else:
            recommended = original
            
        recommended = [int(treat in recommended) for treat in treats_ids]     
        original = [int(treat in original) for treat in treats_ids]     
        
        consumption = []
        for treat in range(treatments_n):
            score_assign = base_consumption + inc_enth_cons*patient_type
            item_cons = int(uniform(size=1)[0] <= score_assign)
            item_cons *= recommended[treat]
            consumption.append(item_cons)
        
        ids += [id]*treatments_n
        sessions += [session]*treatments_n
        patient_types += [patient_type]*treatments_n
        treatments += treats_ids
        recommendations += recommended
        consumptions += consumption
        explorations += [exploration]*treatments_n
        originals += original
    
df = pd.DataFrame({
    'id': ids,
    'session': sessions, 
    'patient_type': patient_types,
    'treatment': treatments, 
    'original': originals,
    'recommended': recommendations,
    'consumed': consumptions,
    'exploration': explorations
})

outcomes = df.groupby('id').apply(lambda x: 
    (sum(x['treatment']*x['consumed']) + 
    mean(x['patient_type'])*enthusiast_effect + 
    normal(size=1, scale=delta))[0]
).reset_index()
outcomes.rename(columns={0:'outcome'}, inplace = True)
df = df.merge(outcomes, on='id')

In [5]:
df.head(n=sessions_n*treatments_n*2)

Unnamed: 0,id,session,patient_type,treatment,original,recommended,consumed,exploration,outcome
0,0,0,0,0,1,0,0,1,0.068825
1,0,0,0,1,1,1,0,1,0.068825
2,0,0,0,2,0,1,0,1,0.068825
3,0,0,0,3,0,1,0,1,0.068825
4,0,0,0,4,1,0,0,1,0.068825
5,0,1,0,0,1,0,0,1,0.068825
6,0,1,0,1,1,1,0,1,0.068825
7,0,1,0,2,0,0,0,1,0.068825
8,0,1,0,3,0,1,0,1,0.068825
9,0,1,0,4,1,1,0,1,0.068825


In [6]:
recs = df.pivot(index=['id', 'session', 'exploration'], columns='treatment', values='recommended').reset_index().merge(outcomes, on='id')
consumes = df.pivot(index=['id', 'session', 'exploration'], columns='treatment', values='consumed').reset_index().merge(outcomes, on='id')
originals = df.pivot(index=['id', 'session', 'exploration'], columns='treatment', values='original').reset_index().merge(outcomes, on='id')

In [7]:
recs.head(sessions_n)

Unnamed: 0,id,session,exploration,0,1,2,3,4,outcome
0,0,0,1,0,1,1,1,0,0.068825
1,0,1,1,0,1,0,1,1,0.068825


In [8]:
consumes.head(sessions_n)

Unnamed: 0,id,session,exploration,0,1,2,3,4,outcome
0,0,0,1,0,0,0,0,0,0.068825
1,0,1,1,0,0,0,0,0,0.068825


In [9]:
originals.head(sessions_n)

Unnamed: 0,id,session,exploration,0,1,2,3,4,outcome
0,0,0,1,1,1,0,0,1,0.068825
1,0,1,1,1,1,0,0,1,0.068825


# Basic Stats

In [10]:
res = df[df.recommended==1].groupby('patient_type').consumed.mean().reset_index()
res

Unnamed: 0,patient_type,consumed
0,0,0.200959
1,1,0.607355


In [11]:
res = df.groupby('patient_type').outcome.mean().reset_index()
res

Unnamed: 0,patient_type,outcome
0,0,2.217833
1,1,17.944112


# Helping functions

In [12]:
def rmse(diff_1, diff_2):
    return(sqrt(mean((diff_1 - diff_2)**2)))

In [13]:
def estimate_score(results):
    scores = zeros(results.shape[0])
    for col in range(0, results.shape[0]):
        scores += results[:, col] - results[col, 0]
    scores /= results.shape[0]
    return(scores)

def rank_differences(results):
    scores = pd.DataFrame({'score': estimate_score(results)}).sort_values(by='score', ascending=False)
    return(scores)

# Real Impact of Recommendations

This are the difference of causal impact between pairs of items

In [14]:
expected_diff_treats = zeros((treatments_n, treatments_n))

expected_compliers = base_consumption*(1-prob_enthusiast) + (base_consumption + inc_enth_cons)*prob_enthusiast

for treat_1 in range(treatments_n):
    for treat_2 in range(treatments_n):
        expected_diff_treats[treat_1, treat_2] = (treat_1 - treat_2)*expected_compliers

pd.DataFrame(expected_diff_treats.round(2))

Unnamed: 0,0,1,2,3,4
0,0.0,-0.36,-0.72,-1.08,-1.44
1,0.36,0.0,-0.36,-0.72,-1.08
2,0.72,0.36,0.0,-0.36,-0.72
3,1.08,0.72,0.36,0.0,-0.36
4,1.44,1.08,0.72,0.36,0.0


In [16]:
rank_differences(expected_diff_treats)

Unnamed: 0,score
4,0.0
3,-0.36
2,-0.72
1,-1.08
0,-1.44


In [None]:
pd.DataFrame(expected_diff_treats)

# Direct Estimation Recommendations

This is the difference between recommendations obtained by trivial computations. The results are biased, leading to an incorrect order.

In [17]:
diff_treats_0 = zeros((treatments_n, treatments_n))

for treat_1 in range(treatments_n):
    for treat_2 in range(treatments_n):
        try: 
            inds_1_0 = recs.loc[:, treat_1] == 1
            inds_2_0 = recs.loc[:, treat_2] == 1
            res_1 = recs[inds_1_0].loc[:, 'outcome'].mean() - recs[inds_2_0].loc[:, 'outcome'].mean()
        except:
            res_1 = NaN
        diff_treats_0[treat_1, treat_2] = res_1
            
pd.DataFrame(diff_treats_0.round(2))

Unnamed: 0,0,1,2,3,4
0,0.0,3.12,-0.17,-4.16,-0.28
1,-3.12,0.0,-3.3,-7.28,-3.4
2,0.17,3.3,0.0,-3.99,-0.1
3,4.16,7.28,3.99,0.0,3.88
4,0.28,3.4,0.1,-3.88,0.0


In comparison with the theoretical results

In [18]:
print("Root Mean Square Error:", rmse(expected_diff_treats, diff_treats_0))
print("Estimated Ranking")
rank_differences(diff_treats_0.round(2))

Root Mean Square Error: 2.992127846336046
Estimated Ranking


Unnamed: 0,score
3,3.564
4,-0.318
2,-0.422
0,-0.596
1,-3.718


# Direct Increment Estimation Recommendations

An estimator based on the differences between when an item has been selected and when it is not. The results are biased, leading to an incorrect order.

In [19]:
diff_treats_0_inc = zeros((treatments_n, treatments_n))

for treat_1 in range(treatments_n):
    for treat_2 in range(treatments_n):
        try: 
            inds_1_0 = recs.loc[:, treat_1] == 1
            inds_2_0 = recs.loc[:, treat_2] == 1
            impact_1 = recs[inds_1_0].loc[:, 'outcome'].mean() - recs[~inds_1_0].loc[:, 'outcome'].mean()
            impact_2 = recs[inds_2_0].loc[:, 'outcome'].mean() - recs[~inds_2_0].loc[:, 'outcome'].mean()
            res_1 = impact_1 - impact_2
        except:
            res_1 = NaN
        diff_treats_0_inc[treat_1, treat_2] = res_1
            
pd.DataFrame(diff_treats_0_inc.round(2))

Unnamed: 0,0,1,2,3,4
0,0.0,7.66,-0.73,-8.59,-1.37
1,-7.66,0.0,-8.39,-16.25,-9.03
2,0.73,8.39,0.0,-7.86,-0.64
3,8.59,16.25,7.86,0.0,7.22
4,1.37,9.03,0.64,-7.22,0.0


In comparison with the theoretical results

In [20]:
print("Root Mean Square Error:", rmse(expected_diff_treats, diff_treats_0_inc))
print("Estimated Ranking")
rank_differences(diff_treats_0_inc.round(2))

Root Mean Square Error: 6.949312118223762
Estimated Ranking


Unnamed: 0,score
3,7.378
4,0.158
2,-0.482
0,-1.212
1,-8.872


# Propensity Scores Adjustment

Using propensity score adjustment to remove the effect of confounders. The results are biased, leading to an incorrect order.

In [21]:
diff_treats_psa = zeros((treatments_n, treatments_n))

N = treatments_n

for treat_1, treat_2 in tqdm(list(itertools.product(treats_ids, treats_ids))):

    propensity_scores_1 = exploration_prob/N + (1 - exploration_prob)*originals[treat_1]
    propensity_scores_2 = exploration_prob/N + (1 - exploration_prob)*originals[treat_2]

    # Calculating Adjustment Formula
    treat_data = recs.copy()
    treat_data['propensity_scores_1'] = exploration_prob/N + (1 - exploration_prob)*originals[treat_1]
    treat_data['propensity_scores_2'] = exploration_prob/N + (1 - exploration_prob)*originals[treat_2]

    do_1 = 0
    for control_vars, sub_data in treat_data.groupby('propensity_scores_1'):
        prop = sub_data.shape[0]/treat_data.shape[0]
        do_1 += sub_data[sub_data[treat_1] == 1].outcome.mean()*prop

    do_2 = 0
    for control_vars, sub_data in treat_data.groupby('propensity_scores_2'):
        prop = sub_data.shape[0]/treat_data.shape[0]
        do_2 += sub_data[sub_data[treat_2] == 1].outcome.mean()*prop

    diff_treats_psa[treat_1, treat_2] = do_1 - do_2

pd.DataFrame(diff_treats_psa.round(2))

100%|██████████████████████████████████████████| 25/25 [00:00<00:00, 131.53it/s]


Unnamed: 0,0,1,2,3,4
0,0.0,0.13,-0.17,-0.34,-0.28
1,-0.13,0.0,-0.3,-0.47,-0.41
2,0.17,0.3,0.0,-0.16,-0.1
3,0.34,0.47,0.16,0.0,0.06
4,0.28,0.41,0.1,-0.06,0.0


In comparison with the theoretical results

In [22]:
print("Root Mean Square Error:", rmse(expected_diff_treats, diff_treats_psa))
print("Estimated Ranking")
rank_differences(diff_treats_psa.round(2))

Root Mean Square Error: 0.533367736931947
Estimated Ranking


Unnamed: 0,score
3,0.074
4,0.014
2,-0.09
0,-0.264
1,-0.394


# Our Method

You can see that with our method we obtain a much lower RMSE, and the correct order of impact of the items

In [23]:
diff_treats = zeros((treatments_n, treatments_n))

N = treatments_n
q = exploration_prob/comb(N-2, k-1)

recs_np = recs[treats_ids].to_numpy()
originals_np = originals[treats_ids].to_numpy()

for treat_1, treat_2 in tqdm(list(itertools.product(treats_ids, treats_ids))):
    if treat_1 != treat_2:
        other_treatments = [t for t in treats_ids if t not in [treat_1, treat_2]]

        # Calculate Propensity Scores
        L = recs_np[:, other_treatments]*originals_np[:, other_treatments]
        L += (1 -recs_np[:, other_treatments])*(1-originals_np[:, other_treatments])
        L = (np.apply_along_axis(np.sum, 1, L) == N-2).astype(int)
        eta = q/(q + L*(1-exploration_prob))
        propensity_scores = eta/2 + L*originals_np[:, treat_1]*(1-eta)

        # Calculating Adjustment Formula
        inds = recs_np[:, treat_1] != recs_np[:, treat_2]
        diff_data = recs[inds]
        diff_data['propensity_scores'] = propensity_scores[inds]

        do_1 = 0
        for control_vars, sub_data in diff_data.groupby('propensity_scores'):
            prop = sub_data.shape[0]/diff_data.shape[0]
            do_1 += sub_data[sub_data[treat_1] == 1].outcome.mean()*prop

        do_2 = 0
        for control_vars, sub_data in diff_data.groupby('propensity_scores'):
            prop = sub_data.shape[0]/diff_data.shape[0]
            do_2 += sub_data[sub_data[treat_2] == 1].outcome.mean()*prop

        diff_treats[treat_1, treat_2] = do_1 - do_2
        
pd.DataFrame(diff_treats.round(2))

100%|███████████████████████████████████████████| 25/25 [00:02<00:00,  8.86it/s]


Unnamed: 0,0,1,2,3,4
0,0.0,-0.31,-0.92,-0.84,-1.45
1,0.31,0.0,-0.49,-0.79,-1.39
2,0.92,0.49,0.0,-0.38,-0.77
3,0.84,0.79,0.38,0.0,-0.16
4,1.45,1.39,0.77,0.16,0.0


In comparison with the theoretical results

In [24]:
print("Root Mean Square Error:", rmse(expected_diff_treats, diff_treats))
print("Estimated Ranking")
rank_differences(diff_treats.round(2))

Root Mean Square Error: 0.14426042967410155
Estimated Ranking


Unnamed: 0,score
4,0.05
3,-0.334
2,-0.652
1,-1.176
0,-1.408
