In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

INFILE = '../data/adoption.csv'

In [None]:
df = pd.read_csv(INFILE)
df.head()

In [None]:
# select participants who passed comprehension checks
# df = df[(df.WorkerId!='test')]
print('N total participants', len(pd.unique(df.ID)))
# remove participants who failed comprehension checks
df = df[(df.FcastComprehension==1) & (df.BonusComprehension==1)]
print('N passed comprehension check', len(pd.unique(df.ID)))
# remove participants in the adopt condition who lost the auction
# and participants not in the adopt condition who won the auction
df = df[df.Adopt == df.WonAuction]
print('N auction results matched adoption condition', len(pd.unique(df.ID)))
# select 'actual' forecasts (as opposed to practice forecasts)
df = df[df.Practice==0]
# scale forecasts to be between 0 and 1 (as opposed to 0 and 100)
df['Fcast'] = df['Fcast'] / 100.
# compute score and interaction variable
df['Exp_x_Adopt'] = df.Explanation * df.Adopt
df['Exp_x_black'] = df.Explanation * df.black
df['Exp_x_Adopt_x_black'] = df.Explanation * df.Adopt * df.black
df['Adopt_x_black'] = df.Adopt * df.black
df['Score'] = (df.y - df.Fcast)**2 - (df.y - df.output)**2
df.values.shape

In [None]:
# remove empty values from the score column
df = df.dropna(subset=['Score'])
df.values.shape

In [None]:
# effect of explanation on performance if everyone adopted the model
# negative coef on interaction indicates the explanation improved performance
# positive coef on interaction indicates the explanation harmed performance
X = sm.add_constant(df[['Explanation', 'Adopt', 'Exp_x_Adopt']])
reg = sm.OLS(df.Score, X)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ID)
res.summary()

In [None]:
# effect of explanation on fairness if everyone adopted the model
# negative coef on interaction indicates the explanation improved fairness
# positive coef on interaction indicates the explanation harmed fairness

# select observations where the offender did not recidivate and the model was adopted
no_recid_df = df[(df.y == 0) & (df.Adopt==1)]
X = sm.add_constant(no_recid_df[['Explanation', 'black', 'Exp_x_black']])
reg = sm.OLS(no_recid_df.Score, X)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=no_recid_df.ID)
res.summary()

In [None]:
def simulate_market(df, price):
    # indicates the participant would have adopted the model in this simulation
    df['SimulateAdopt'] = df.WTP > price
    compute_performance_effect(df, price)
    compute_fairness_effect(df, price)
    compute_adoption_effect(df, price)
    decompose(df, price)
    
def compute_performance_effect(df, price):
    df = df[df.Adopt == df.SimulateAdopt]
    X = sm.add_constant(df.Explanation)
    reg = sm.OLS(df.Score, X)
    res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ID)
    print('\nNegative coefficient on explanation indicates that explanation improved performance')
    print('Positive coefficient on explanation indicates that explanation harmed performance\n')
    print(res.summary())
    
def compute_fairness_effect(df, price):
    df = df[(df.Adopt == df.SimulateAdopt) & (df.y == 0)]
    df['Exp_x_black'] = df.Explanation * df.black
    X = sm.add_constant(df[['Explanation', 'black', 'Exp_x_black']])
    reg = sm.OLS(df.Score, X)
    res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ID)
    print('\nNegative coefficient on interaction indicates that explanation improved fairness')
    print('Positive coefficient on interaction indicates that explanation harmed fairness\n')
    print(res.summary())
    
def compute_adoption_effect(df, price):
    df = df.drop_duplicates(subset=['ID'])
    X = sm.add_constant(df.Explanation)
    reg = sm.OLS(df.Adopt, X)
    res = reg.fit().get_robustcov_results(cov_type='HC0')
    print('\nPositive coefficient on explanation indicates the explanation increased adoption')
    print('Negative coefficient on explanation indicates that explanation decreased adoption\n')
    print(res.summary())
    
def decompose(df, price):
    df = df[df.Adopt == df.SimulateAdopt]
    X = sm.add_constant(df[['Explanation', 'Adopt', 'Exp_x_Adopt']])
    reg = sm.OLS(df.Score, X)
    res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ID)
    print(res.summary())
    
median_bid = df.drop_duplicates(subset='ID').WTP.median()
simulate_market(df, median_bid)

In [None]:
df.groupby(['Explanation', 'y']).Fcast.mean()

In [None]:
df.groupby(['Explanation', 'black', 'y']).Fcast.mean()

In [None]:
df.groupby(['black', 'y']).output.mean()