In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

INFILE = '../data/comparison.csv'

In [2]:
df = pd.read_csv(INFILE)
df.head()

Unnamed: 0,ParticipantID,CompletionCode,ID,EndTime,StartTime,Status,DemographicsTime,Gender,GenderSpecify,AgeBins,...,juv_other_count,priors_count,felony,black,married,output,y,FcastTimer,MostImportantFeature,next
0,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,1.0,1.0,0.0,1.0,0.0,0.766923,1.0,29.173377,,
1,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,0.0,0.0,0.0,0.0,0.08892,0.0,145.471718,,
2,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,15.0,0.0,1.0,0.0,0.736685,1.0,8.672931,,
3,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,1.0,1.0,1.0,0.0,0.443584,0.0,10.088882,,
4,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,2.0,1.0,0.0,0.0,0.363658,0.0,17.196649,,


In [3]:
# select participants who passed comprehension checks
df = df[(df.ParticipantID!='test')]
print('N total participants', len(pd.unique(df.ParticipantID)))
df = df[(df.FcastComprehension==1) & (df.BonusComprehension==1)]
print('N passed comprehension check', len(pd.unique(df.ParticipantID)))
# select 'actual' forecasts (as opposed to practice forecasts)
df = df[df.Practice==0]
# scale forecasts to be between 0 and 1 (as opposed to 0 and 100)
df['Fcast'] = df['Fcast'] / 100.
# compute DVs
df['Score'] = (df.y - df.Fcast)**2 - (df.y - df.output)**2
df.values.shape

N total participants 100
N passed comprehension check 78


(780, 50)

In [4]:
# not pre-registered cell: added to remove empty values from Score column
df = df.dropna(subset=['Score'])
df.values.shape

(762, 50)

In [5]:
# prediction performance hypothesis
# positive coef on constant regressor indicates model outperformed human predictions
reg = sm.OLS(df.Score, np.array([1]*len(df)))
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,Score,R-squared:,-0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,
Time:,23:27:31,Log-Likelihood:,-34.36
No. Observations:,762,AIC:,70.72
Df Residuals:,761,BIC:,75.36
Df Model:,0,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0602,0.009,6.664,0.000,0.042,0.078

0,1,2,3
Omnibus:,37.384,Durbin-Watson:,1.976
Prob(Omnibus):,0.0,Jarque-Bera (JB):,46.435
Skew:,0.475,Prob(JB):,8.26e-11
Kurtosis:,3.749,Cond. No.,1.0


In [6]:
# fairness hypothesis
# positive coef on black indicates that human predictions are less fair than model predictions
no_recid_df = df[df.y == 0]
X = sm.add_constant(no_recid_df.black)
reg = sm.OLS(no_recid_df.Score, X)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=no_recid_df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,Score,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,4.884
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,0.0302
Time:,23:27:31,Log-Likelihood:,-36.464
No. Observations:,373,AIC:,76.93
Df Residuals:,371,BIC:,84.77
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0517,0.024,2.151,0.035,0.004,0.100
black,0.0608,0.027,2.210,0.030,0.006,0.116

0,1,2,3
Omnibus:,9.856,Durbin-Watson:,1.704
Prob(Omnibus):,0.007,Jarque-Bera (JB):,9.862
Skew:,0.378,Prob(JB):,0.00722
Kurtosis:,3.253,Cond. No.,2.57


In [7]:
reg = sm.OLS(no_recid_df.Fcast - no_recid_df.output, X)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=no_recid_df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.011
Method:,Least Squares,F-statistic:,5.472
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,0.022
Time:,23:27:31,Log-Likelihood:,-33.897
No. Observations:,373,AIC:,71.79
Df Residuals:,371,BIC:,79.64
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0118,0.024,0.502,0.617,-0.035,0.059
black,0.0623,0.027,2.339,0.022,0.009,0.115

0,1,2,3
Omnibus:,0.348,Durbin-Watson:,1.722
Prob(Omnibus):,0.84,Jarque-Bera (JB):,0.471
Skew:,-0.045,Prob(JB):,0.79
Kurtosis:,2.851,Cond. No.,2.57


In [8]:
df.groupby(['black', 'y']).Fcast.mean()

black  y  
0.0    0.0    0.422680
       1.0    0.515203
1.0    0.0    0.522905
       1.0    0.661826
Name: Fcast, dtype: float64

In [9]:
df.groupby(['black', 'y']).output.mean()

black  y  
0.0    0.0    0.410850
       1.0    0.577980
1.0    0.0    0.448818
       1.0    0.634106
Name: output, dtype: float64

In [10]:
# compute bonuses
completed_df = df[df.Status == 'Completed']
bonus_df = 1.5 * (1-completed_df.groupby(by='ParticipantID').Score.mean())
bonus_df.to_csv('../data/comparison_bonus_1.csv')

In [11]:
bonus_df.sum()

107.14405587449993

In [13]:
(bonus_df > 1.5).mean()

0.2236842105263158