In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

INFILE = '../data/comparison.csv'

In [2]:
df = pd.read_csv(INFILE)
df.head()

Unnamed: 0,ParticipantID,CompletionCode,ID,EndTime,StartTime,Status,DemographicsTime,Gender,GenderSpecify,AgeBins,...,juv_other_count,priors_count,felony,black,married,output,y,FcastTimer,MostImportantFeature,next
0,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,1.0,1.0,0.0,1.0,0.0,0.766923,1.0,29.173377,,
1,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,0.0,0.0,0.0,0.0,0.08892,0.0,145.471718,,
2,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,15.0,0.0,1.0,0.0,0.736685,1.0,8.672931,,
3,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,1.0,1.0,1.0,0.0,0.443584,0.0,10.088882,,
4,test,yxI0jg,1,2020-11-19 23:43:51.969826,2020-11-19 23:32:59.155429,Completed,42.387854,Male,,25-29,...,0.0,2.0,1.0,0.0,0.0,0.363658,0.0,17.196649,,


In [3]:
# select participants who passed comprehension checks
df = df[(df.ParticipantID!='test')]
print('N total participants', len(pd.unique(df.ParticipantID)))
df = df[(df.FcastComprehension==1) & (df.BonusComprehension==1)]
print('N passed comprehension check', len(pd.unique(df.ParticipantID)))
# select 'actual' forecasts (as opposed to practice forecasts)
df = df[df.Practice==0]
# scale forecasts to be between 0 and 1 (as opposed to 0 and 100)
df['Fcast'] = df['Fcast'] / 100.
# compute DVs
df['Score'] = (df.y - df.Fcast)**2 - (df.y - df.output)**2
df.values.shape

N total participants 105
N passed comprehension check 84


(840, 50)

In [4]:
# not pre-registered cell: added to remove empty values from Score column
df = df.dropna(subset=['Score'])
df.values.shape

(822, 50)

In [5]:
# prediction performance hypothesis
# positive coef on constant regressor indicates model outperformed human predictions
reg = sm.OLS(df.Score, np.array([1]*len(df)))
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,Score,R-squared:,0.0
Model:,OLS,Adj. R-squared:,0.0
Method:,Least Squares,F-statistic:,
Date:,"Sun, 29 Nov 2020",Prob (F-statistic):,
Time:,09:48:13,Log-Likelihood:,-35.139
No. Observations:,822,AIC:,72.28
Df Residuals:,821,BIC:,76.99
Df Model:,0,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0621,0.009,7.294,0.000,0.045,0.079

0,1,2,3
Omnibus:,35.678,Durbin-Watson:,2.019
Prob(Omnibus):,0.0,Jarque-Bera (JB):,42.977
Skew:,0.45,Prob(JB):,4.65e-10
Kurtosis:,3.668,Cond. No.,1.0


In [6]:
abs_error = abs(df.y - df.Fcast) - abs(df.y - df.output)
reg = sm.OLS(abs_error, np.array([1]*len(df)))
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,-0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,
Date:,"Sun, 29 Nov 2020",Prob (F-statistic):,
Time:,09:48:13,Log-Likelihood:,-55.238
No. Observations:,822,AIC:,112.5
Df Residuals:,821,BIC:,117.2
Df Model:,0,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0266,0.009,2.867,0.005,0.008,0.045

0,1,2,3
Omnibus:,1.435,Durbin-Watson:,1.986
Prob(Omnibus):,0.488,Jarque-Bera (JB):,1.387
Skew:,-0.006,Prob(JB):,0.5
Kurtosis:,2.799,Cond. No.,1.0


In [7]:
# fairness hypothesis
# positive coef on black indicates that human predictions are less fair than model predictions
no_recid_df = df[df.y == 0]
X = sm.add_constant(no_recid_df.black)
reg = sm.OLS(no_recid_df.Score, X)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=no_recid_df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,Score,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,6.041
Date:,"Sun, 29 Nov 2020",Prob (F-statistic):,0.0161
Time:,09:48:13,Log-Likelihood:,-36.977
No. Observations:,402,AIC:,77.95
Df Residuals:,400,BIC:,85.95
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0502,0.023,2.173,0.033,0.004,0.096
black,0.0637,0.026,2.458,0.016,0.012,0.115

0,1,2,3
Omnibus:,10.041,Durbin-Watson:,1.725
Prob(Omnibus):,0.007,Jarque-Bera (JB):,10.063
Skew:,0.372,Prob(JB):,0.00653
Kurtosis:,3.217,Cond. No.,2.59


In [8]:
reg = sm.OLS(no_recid_df.Fcast - no_recid_df.output, X)
res = reg.fit().get_robustcov_results(cov_type='cluster', groups=no_recid_df.ParticipantID)
res.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.014
Model:,OLS,Adj. R-squared:,0.012
Method:,Least Squares,F-statistic:,6.552
Date:,"Sun, 29 Nov 2020",Prob (F-statistic):,0.0123
Time:,09:48:13,Log-Likelihood:,-37.484
No. Observations:,402,AIC:,78.97
Df Residuals:,400,BIC:,86.96
Df Model:,1,,
Covariance Type:,cluster,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0104,0.023,0.452,0.653,-0.035,0.056
black,0.0644,0.025,2.560,0.012,0.014,0.114

0,1,2,3
Omnibus:,0.717,Durbin-Watson:,1.721
Prob(Omnibus):,0.699,Jarque-Bera (JB):,0.81
Skew:,-0.036,Prob(JB):,0.667
Kurtosis:,2.792,Cond. No.,2.59


In [9]:
df.groupby(['black', 'y']).Fcast.mean()

black  y  
0.0    0.0    0.417330
       1.0    0.512051
1.0    0.0    0.523316
       1.0    0.650152
Name: Fcast, dtype: float64

In [10]:
df.groupby(['black', 'y']).output.mean()

black  y  
0.0    0.0    0.406958
       1.0    0.576843
1.0    0.0    0.448523
       1.0    0.631128
Name: output, dtype: float64

In [11]:
import matplotlib.pyplot as plt

bonus = df.groupby('ID').Score.mean()
15*(1-bonus).describe()

count    1245.000000
mean       14.062996
std         1.153723
min        10.530395
25%        13.297698
50%        14.211715
75%        14.900474
max        16.487877
Name: Score, dtype: float64

In [16]:
from scipy.stats import ttest_ind

ttest_ind((df.y-df.Fcast)**2, (df.y-df.output)**2, equal_var=False)

Ttest_indResult(statistic=5.455431677117664, pvalue=5.751635997897882e-08)