In [93]:
# Import the necessary package
import numpy as np
import pandas as pd
from sklearn.utils import resample
import random
from decimal import Decimal
from IPython.display import display
import warnings

In [94]:
random.seed(666)# set the seed for reproducible reason

#import the data and calculate the mean effort for each treatment
df = pd.read_stata('D:\\Temp\\mturk_clean_data_short.dta')
emp_moments = np.array(np.round(df.groupby("treatment").mean("buttonpresses")))

In [95]:
# Treatments are as follows:
# 1.1: benchmark specification with piece rate of 0.01 
# 1.2: benchmark specification with piece rate of 0.10 
# 1.3: benchmark specification with piece rate of 0.00 
# 3.1: social preferences (charity) with piece rate of 0.01 
# 3.2: social preferences (charity) with piece rate of 0.10 
# 10 : social preferences (gift exchange) bonus of 40 cents (independently of nr buttonpresses)
# 4.1: time discounting with extra 0.01 paid two weeks later
# 4.2: time discounting with extra 0.01 paid four weeks later

# resample is a useful command from the sklearn library that samples with replacement our data.
# I first get a smaller dataframe containing only observations for a specific treatment, I then resample the observations and
# compute the rounded mean of buttonpresses, save the result and then pass onto the next treatment. I do this for 'number' times.
def mybootstrap(dataset, number):
    
    E11, E12, E13, E31, E32, E10, E41, E42 = [], [], [], [], [], [], [], []
    box={'1.1':E11,'1.2':E12,'1.3':E13,'3.1':E31,'3.2':E32,'10':E10,'4.1':E41,'4.2':E42}
    
    for i in range(1,number+1):
        for a,b in box.items():
            db = dataset[dataset.treatment==a]
            bootsample = resample(db['buttonpresses'],replace=True,)
            b.append(np.round(np.mean(bootsample)))
    return E11, E12, E13, E31, E32, E10, E41, E42

E11, E12, E13, E31, E32, E10, E41, E42 = mybootstrap(df,2000)




In [96]:
import numpy as np

def mymindisest(params):
    '''
    params: params is the dictionary include the mean effort value for each bootstrap result or the original bootstrap outcome(so the length can be 2000 for each treatment).
            Besides, it also include a key called "specification", we can choose "Exp" or "Power" as our cost function.
    '''
    # Define constants payoff p
    P = [0, 0.01, 0.1] # P is a vector containing the different piece-rates
    expr = {
        'Exp': {
            'log_k': lambda E11, E12: (np.log(P[2]) - np.log(P[1]) * E12 / E11) / (1 - E12 / E11),
            'log_gamma': lambda log_k, E11: np.log((np.log(P[1]) - log_k) / E11),
            'log_s': lambda log_gamma, E13, log_k: np.exp(log_gamma) * E13 + log_k,
            'EG31': lambda E31, g: np.exp(E31 * g),'EG32': lambda E32, g: np.exp(E32 * g),'EG10': lambda E10, g: np.exp(E10 * g),'EG41': lambda E41, g: np.exp(E41 * g),'EG42': lambda E42, g: np.exp(E42 * g),
        


        },
        'Power': {
            'log_k': lambda E11, E12: (np.log(P[2]) - np.log(P[1]) * np.log(E12) / np.log(E11)) / (1 - np.log(E12) / np.log(E11)),
            'log_gamma': lambda log_k, E11: np.log((np.log(P[1]) - log_k) / np.log(E11)),
            'log_s': lambda log_gamma, E13, log_k: np.exp(log_gamma) * np.log(E13) + log_k,
            'EG31': lambda E31, g: E31 ** g,'EG32': lambda E32, g: E32 ** g,'EG10': lambda E10, g: E10 ** g,'EG41': lambda E41, g: E41 ** g,'EG42': lambda E42, g: E42 ** g
        }
    }

    # Extract arguments from params dictionary
    E11 = np.array(params['E11'])
    E12 = np.array(params['E12'])
    E13 = np.array(params['E13'])
    E31 = np.array(params['E31'])
    E32 = np.array(params['E32'])
    E10 = np.array(params['E10'])
    E41 = np.array(params['E41'])
    E42 = np.array(params['E42'])
    #for a,b in params.items():
        #a = params[f"{a}"]
    specification = params['specification']

    # Calculate k, gamma, alpha, a, s_ge, delta, beta
    log_k = expr[specification]['log_k'](E11, E12)
    log_gamma = expr[specification]['log_gamma'](log_k, E11)
    log_s = expr[specification]['log_s'](log_gamma, E13, log_k)
    k = np.exp(log_k)
    g = np.exp(log_gamma)
    s = np.exp(log_s)
    EG31 = expr[specification]['EG31'](E31,g)
    EG32 = expr[specification]['EG32'](E32,g)
    EG10 = expr[specification]['EG10'](E10,g)
    EG41 = expr[specification]['EG41'](E41,g)
    EG42 = expr[specification]['EG42'](E42,g)
    alpha = 100/9*k*(EG32-EG31)
    a = 100*k*EG31-100*s-alpha
    s_ge = k*EG10 - s
    delta = np.sqrt((k*EG42-s)/(k*EG41-s))
    beta  = 100*(k*EG41-s)/(delta**2)
    return k, g, s, alpha, a, s_ge, beta, delta



In [97]:
#Define the dictionary to put into function
#Estimate the result
##Table 5 minimum distance estimates: columns (1) (3) panel A and columns (1) (4) panel B

params = {'E11':emp_moments[0],'E12':emp_moments[1],'E13':emp_moments[2],'E31':emp_moments[6],'E32':emp_moments[7],
          "E10":emp_moments[4],'E41':emp_moments[8],'E42':emp_moments[9],'specification':'Exp'}
#Including the mean effort from empirical data for each treatment.

Table5Exp = np.array(mymindisest(params)).flatten()
params['specification'] = 'Power'
Table5Power = np.array(mymindisest(params)).flatten()
Table5Exp


array([1.26689239e-16, 1.57711308e-02, 3.31549170e-06, 3.07335179e-03,
       1.42605422e-01, 8.57890628e-06, 1.15234320e+00, 7.64690724e-01])

## 3. Estimation

### Point Estimates and Standard Errors

We now compute the minimum distance estimates for Table 5 and the standard errors via a bootstrap procedure.


In [98]:
vmindisest = np.vectorize(mymindisest)
params['specification'] = "Exp"
estimatesExp = vmindisest(params)
#mean_sd = np.zeros((8,2))
#for i in range(0,8):
    #mean_sd[i,0],mean_sd[i,1] = np.nanmean(estimatesExp[i]), np.nanstd(estimatesExp[i])

In [99]:
#Define the new dictionary, now ,for each treatment, there is a array that length equal to 2000, which we obtain from boostrap process.
nw_params = {"E11":E11,"E12":E12,"E13":E13,"E31":E31,"E32":E32,"E10":E10,"E41":E41,"E42":E42,"specification":"Exp"}
vmindisest = np.vectorize(mymindisest)#vectorize the function

#Obtain the estimation result
estimatesExp = vmindisest(nw_params)
nw_params['specification'] = 'Power'
estimatesPower = vmindisest(nw_params)

In [100]:
# Nowe, to obtain confidence intervals in panel B table 5. CI are the 2.5% and 97.5% quantiles of the distribution of our
# parameters vectors. There are 2000 values in each vector. The 2.5% number and 97.5% number is constant in the original code.
# However, we may obtain some numbers equal to nan or inf when processing, which may affect on our estimation result.
#Therefore, I modify the code, making it can be change accordingly. In this part, We obtain CI only for alpha, a, s_ge, beta, delta as in the paper

CI_Exp, CI_Power = [], []
for ci in range(3,8):
    a  = sorted(estimatesExp[ci][~np.isnan(estimatesExp[ci])&~np.isinf(estimatesExp[ci])])
    CI_Exp.append([a[round(len(a)*0.025)],a[round(len(a)*0.975)]])
    b  = sorted(estimatesPower[ci][~np.isnan(estimatesPower[ci])&~np.isinf(estimatesPower[ci])])
    CI_Power.append([b[round(len(b)*0.025)],b[round(len(b)*0.975)]])

In [101]:
#Function for compute the mean and standard error of estimates for the cost function using the Bootstrap procedure
def cal_res(params,type):
    
    '''params : params is a dictionary, which include each bootstraping result for each parameters
       type : Type can be Exp or Power, depends on whcih cost function you want to apply.'''
    if type == "Exp":
        params['specification'] = "Exp"
        estimates = vmindisest(params)
    else:
        params['specification'] = "Power"
        estimates = vmindisest(params)
    res = np.zeros((8,2))
    for i in range(0,8):
        res[i,0],res[i,1] = np.mean(estimates[i][~np.isnan(estimates[i])&~np.isinf(estimates[i])]), np.std(estimates[i][~np.isnan(estimates[i])&~np.isinf(estimates[i])])
    return res


In [102]:
# Store mean and standard error of estimates for the cost function using the Bootstrap procedure
warnings.filterwarnings('ignore') # This is to avoid showing RuntimeWarning in the notebook regarding overflow. For a couple of cases in our 1000 new samples
                                  # we cannot find the results because of overflow. Losing 2-3 observations out of thousands should not change the overall mean
                                  # for the parameters
exp_res = cal_res(nw_params,type="Exp")
power_res = cal_res(nw_params,type="Power")

In [103]:
params_name = ["Level k of cost of effort", "Curvature γ of cost function","Intrinsic motivation s","Social preferences α",
                "Warm glow coefficient a","Gift exchange Δs", "Present bias β","(Weekly) discount factor δ"]
CIpanelB = pd.DataFrame({'CI_Exp':CI_Exp, 'CI_Power':CI_Power})
sd_exp   = exp_res[0:8,1]
sd_power = power_res[0:8,1]

In [104]:
#I did not change the code in this cell
# Print the results
# Formatting the results nicely for the table

from decimal import Decimal

columns = [Table5Power, sd_power, Table5Exp, sd_exp]
vs = []
for col in columns:
    col = ['{0:.2e}'.format(Decimal(col[0])), round(col[1],3), '{0:.2e}'.format(Decimal(col[2])),
           round(col[3],3), round(col[4],3), '{0:.2e}'.format(Decimal(col[5])), round(col[6],2), round(col[7],2)]
    vs.append(col)
    
Table5Results = pd.DataFrame({'Parameters name': params_name,
                              'Minimum dist est on average effort Power point estimates': vs[0],
                              'Minimum dist est on average effort Power standard errors': vs[1],
                              'Minimum dist est on average effort Exp point estimates': vs[2],
                              'Minimum dist est on average effort Exp standard errors': vs[3]})
    
# Standard errors are different since the seed we used for the bootstrap procedure is different from the one used by the authors since 
# random generation across softwares/languages is not easily replicated (each software uses its own algorithm)

from IPython.display import display
print('Table 5: Estimates of behavioural parameters I: Mturkers actual effort. Minimum distance estimates')
display(Table5Results)

Table 5: Estimates of behavioural parameters I: Mturkers actual effort. Minimum distance estimates


Unnamed: 0,Parameters name,Minimum dist est on average effort Power point estimates,Minimum dist est on average effort Power standard errors,Minimum dist est on average effort Exp point estimates,Minimum dist est on average effort Exp standard errors
0,Level k of cost of effort,2.54e-112,2.82e-65,1.27e-16,4.65e+91
1,Curvature γ of cost function,33.138,56.82,0.016,0.027
2,Intrinsic motivation s,7.12e-07,1.02e-05,3.32e-06,2.3e-05
3,Social preferences α,0.003,0.014,0.003,0.014
4,Warm glow coefficient a,0.125,0.146,0.143,0.15
5,Gift exchange Δs,3.26e-06,2.27e-05,8.58e-06,3.62e-05
6,Present bias β,1.17,8.37,1.15,6.99
7,(Weekly) discount factor δ,0.75,0.29,0.76,0.31


In [105]:
nw_params['specification'] = "Power"
estimatesPower = vmindisest(nw_params)
exp_result = np.zeros((8,2))
for i in range(0,8):
    exp_result[i,0],exp_result[i,1] = np.mean(estimatesExp[i][~np.isnan(estimatesExp[i])&~np.isinf(estimatesExp[i])]), np.std(estimatesExp[i][~np.isnan(estimatesExp[i])&~np.isinf(estimatesExp[i])])
exp_result

array([[1.04028377e+90, 4.65112723e+91],
       [1.77025794e-02, 2.69091606e-02],
       [1.18971952e-05, 2.30383279e-05],
       [3.57957546e-03, 1.35831888e-02],
       [1.77158102e-01, 1.49931926e-01],
       [2.14317153e-05, 3.62256943e-05],
       [2.15888449e+00, 6.98872176e+00],
       [7.91427908e-01, 3.13428468e-01]])

In [106]:
a = np.array(nw_params['E10'])
b = np.array(nw_params['E11'])
al = 100/9*1.5*(a-b)
al

array([-7600.        , -5833.33333333, -7216.66666667, ...,
       -6733.33333333, -7733.33333333, -7350.        ])

In [107]:
a_list = []
re = ["k", "g", "s", "alpha", "a", "s_ge", "beta", "delta"]
for i in range(0,8):
    a_list.append(i)
for i,j in zip(a_list,re):
    a,b = f'{j}'+"_exp"+"_meann", f'{j}'+"_exp"+"_sdd"
    a,b= np.nanmean(estimatesExp[i]), np.nanstd(estimatesExp[i])
