In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

INFILE = '../data/model_comparison.csv'

In [2]:
df = pd.read_csv(INFILE)
df.head()

Unnamed: 0,ParticipantID,CompletionCode,ID,EndTime,StartTime,Status,DemographicsTime,Gender,GenderSpecify,AgeBins,...,juv_misd_count,juv_other_count,priors_count,felony,black,married,output,y,FcastTimer,next
0,test,ceoict,2,2020-11-03 18:08:32.139990,2020-11-03 18:03:24.024457,Completed,84.897999,Male,,25-29,...,0.0,0.0,2.0,1.0,1.0,0.0,0.207068,1.0,11.135189,
1,test,ceoict,2,2020-11-03 18:08:32.139990,2020-11-03 18:03:24.024457,Completed,84.897999,Male,,25-29,...,0.0,0.0,0.0,0.0,0.0,0.0,0.381263,0.0,8.531103,
2,test,ceoict,2,2020-11-03 18:08:32.139990,2020-11-03 18:03:24.024457,Completed,84.897999,Male,,25-29,...,0.0,0.0,2.0,1.0,0.0,0.0,0.403128,0.0,9.866797,
3,test,ceoict,2,2020-11-03 18:08:32.139990,2020-11-03 18:03:24.024457,Completed,84.897999,Male,,25-29,...,0.0,1.0,0.0,1.0,1.0,0.0,0.560167,1.0,13.689945,
4,test,ceoict,2,2020-11-03 18:08:32.139990,2020-11-03 18:03:24.024457,Completed,84.897999,Male,,25-29,...,0.0,0.0,7.0,1.0,1.0,0.0,0.710434,1.0,6.015892,


In [3]:
# select participants who passed comprehension checks
df = df[(df.ParticipantID!='test')]
print('N total participants', len(pd.unique(df.ParticipantID)))
df = df[(df.FcastComprehension==1) & (df.BonusComprehension==1)]
print('N passed comprehension check', len(pd.unique(df.ParticipantID)))
# select 'actual' forecasts (as opposed to practice forecasts)
df = df[df.Practice==0]
# scale forecasts to be between 0 and 1 (as opposed to 0 and 100)
df['Fcast'] = df['Fcast'] / 100.
# compute DVs
df['Score'] = (df.y - df.Fcast)**2 - (df.y - df.output)**2
df['dFPP'] = (df.Fcast - df.output).loc[(df.y==0)]
df.values.shape

N total participants 86
N passed comprehension check 62


(620, 50)

In [4]:
# not pre-registered cell: added to remove empty values from Score column
df = df.dropna(subset=['Score'])
df.values.shape

(538, 50)

In [5]:
# prediction performance hypothesis
# positive coef on constant regressor indicates model outperformed human predictions
reg = sm.OLS(df.Score, np.array([1]*len(df)))
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,Score,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,
Time:,17:53:57,Log-Likelihood:,-27.27
No. Observations:,538,AIC:,56.54
Df Residuals:,537,BIC:,60.83
Df Model:,0,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0337,0.012,2.836,0.006,0.010,0.057

0,1,2,3
Omnibus:,27.76,Durbin-Watson:,1.986
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34.052
Skew:,0.482,Prob(JB):,4.03e-08
Kurtosis:,3.768,Cond. No.,1.0


In [6]:
# fairness hypothesis
# positive coef on black indicates that human predictions are less fair than model predictions
no_recid_df = df[df.y == 0]
reg = sm.OLS(no_recid_df.Score, no_recid_df.black)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=no_recid_df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,Score,R-squared (uncentered):,0.064
Model:,OLS,Adj. R-squared (uncentered):,0.061
Method:,Least Squares,F-statistic:,8.543
Date:,"Thu, 19 Nov 2020",Prob (F-statistic):,0.00509
Time:,17:53:57,Log-Likelihood:,-20.512
No. Observations:,249,AIC:,43.02
Df Residuals:,248,BIC:,46.54
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
black,0.0993,0.034,2.923,0.005,0.031,0.167

0,1,2,3
Omnibus:,7.242,Durbin-Watson:,1.621
Prob(Omnibus):,0.027,Jarque-Bera (JB):,7.056
Skew:,0.365,Prob(JB):,0.0294
Kurtosis:,3.384,Cond. No.,1.0


In [7]:
# compute bonuses
completed_df = df[df.Status == 'Completed']
bonus_df = (1-completed_df.groupby(by='ParticipantID').Score.mean())
bonus_df.to_csv('../data/comparison_bonus_1.csv')