## Control v.s. Treatment

In [1]:
import os
import pandas as pd
import pingouin as pt
import warnings
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats

warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.float_format', '{:.3f}'.format)

### ```拆分成 control 與 treatment```

In [3]:
merge = pd.read_csv(r'C:\Users\User\Desktop\poll\data\processed_data\merge_data.csv', index_col=0)
control = merge.loc[(merge['treatment'] == 0)]
treatment = merge.loc[(merge['treatment'] == 1)]

In [4]:
# control wtp
control_wtp = control[['treatment', 'player.wtp_voting_cost', 'player.wtp_voting_cost_pq', 'sb_condition']]
control_wtp['wtp_sum'] = control_wtp.apply(lambda x : x['player.wtp_voting_cost'] + x['player.wtp_voting_cost_pq'], axis = 1)

# treatment wtp
treatment_wtp = treatment[['treatment','player.wtp_voting_cost', 'sb_condition']]
treatment_wtp.rename(columns = {'player.wtp_voting_cost':'wtp'}, inplace = True)

### ```拆分成大大、小小、大小、小大```

In [5]:
# select data based on sb_condition
bb = 'bb'
bb_t = treatment_wtp.query('sb_condition == @bb')
bb_c = control_wtp.query('sb_condition == @bb')

ss = 'ss'
ss_t = treatment_wtp.query('sb_condition == @ss')
ss_c = control_wtp.query('sb_condition == @ss')

sb = 'sb'
sb_t = treatment_wtp.query('sb_condition == @sb')
sb_c = control_wtp.query('sb_condition == @sb')

bs = 'bs'
bs_t = treatment_wtp.query('sb_condition == @bs')
bs_c = control_wtp.query('sb_condition == @bs')

In [6]:
# 顯示人數分配
print('control group:')
print(f"【大大】: {len(bb_c)}")
print(f"【大小】: {len(bs_c)}")
print(f"【小大】: {len(sb_c)}")
print(f"【小小】: {len(ss_c)}")

print('---------------------')

print('treatment group:')
print(f"【大大】: {len(bb_t)}")
print(f"【大小】: {len(bs_t)}")
print(f"【小大】: {len(sb_t)}")
print(f"【小小】: {len(ss_t)}")

control group:
【大大】: 135
【大小】: 105
【小大】: 105
【小小】: 105
---------------------
treatment group:
【大大】: 135
【大小】: 105
【小大】: 105
【小小】: 105


### ```all```

In [7]:
# implement unpaired ttest
pt.ttest(control_wtp['wtp_sum'], treatment_wtp['wtp'], paired = False)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,1.495,898,two-sided,0.135,"[-0.72, 5.36]",0.1,0.224,0.321


In [8]:
# get the mean of wtp
print('control 與 treatment 的 wtp 平均值')
print('---------------------------------')
print(f"control: {control_wtp['wtp_sum'].describe()['mean']}")
print(f"treamtent : {treatment_wtp['wtp'].describe()['mean']}")

control 與 treatment 的 wtp 平均值
---------------------------------
control: 29.788888888888888
treamtent : 27.47111111111111


### ```bb```

In [9]:
# bb
pt.ttest(bb_c['wtp_sum'], bb_t['wtp'], paired = False)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,1.095,268,two-sided,0.275,"[-2.82, 9.87]",0.133,0.236,0.194


In [10]:
# get the mean of wtp
print('【大大】control 與 treatment 的 wtp 平均值')
print('---------------------------------')
print(f"control: {bb_c['wtp_sum'].describe()['mean']}")
print(f"treamtent: {bb_t['wtp'].describe()['mean']}")

【大大】control 與 treatment 的 wtp 平均值
---------------------------------
control: 39.77777777777778
treamtent: 36.25185185185185


### ```ss```

In [11]:
# ss
pt.ttest(ss_c['wtp_sum'], ss_t['wtp'], paired = False)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,1.275,208,two-sided,0.204,"[-1.86, 8.66]",0.176,0.322,0.245


In [12]:
# get the mean of wtp
print('【小小】control 與 treatment 的 wtp 平均值')
print('---------------------------------')
print(f"control: {ss_c['wtp_sum'].describe()['mean']}")
print(f"treamtent: {ss_t['wtp'].describe()['mean']}")

【小小】control 與 treatment 的 wtp 平均值
---------------------------------
control: 20.552380952380954
treamtent: 17.152380952380952


### ```sb```

In [13]:
# sb
pt.ttest(sb_c['wtp_sum'], sb_t['wtp'], paired = False)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.789,208,two-sided,0.431,"[-8.7, 3.73]",0.109,0.201,0.123


In [14]:
# get the mean of wtp
print('【小大】control 與 treatment 的 wtp 平均值')
print('---------------------------------')
print(f"control: {sb_c['wtp_sum'].describe()['mean']}")
print(f"treamtent: {sb_t['wtp'].describe()['mean']}")

【小大】control 與 treatment 的 wtp 平均值
---------------------------------
control: 28.133333333333333
treamtent: 30.61904761904762


### ```bs```

In [15]:
# bs
pt.ttest(bs_c['wtp_sum'], bs_t['wtp'], paired = False)

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,1.852,208,two-sided,0.065,"[-0.29, 9.26]",0.256,0.747,0.454


In [16]:
# get the mean of wtp
print('【大小】control 與 treatment 的 wtp 平均值')
print('---------------------------------')
print(f"control: {bs_c['wtp_sum'].describe()['mean']}")
print(f"treamtent: {bs_t['wtp'].describe()['mean']}")

【大小】control 與 treatment 的 wtp 平均值
---------------------------------
control: 27.83809523809524
treamtent: 23.35238095238095


### ```檢視 treatment 和 is_large 是否有交互作用```

In [17]:
# create dataset
control_int = control_wtp[['wtp_sum', 'treatment', 'sb_condition']]
control_int = control_int.rename(columns = {'wtp_sum':'wtp'})

treatment_int = treatment_wtp

merge_int = pd.concat([control_int, treatment_int])

In [18]:
# get dummies
merge_int['sb'] = merge_int['sb_condition'].apply(lambda x : int(x == 'sb'))
merge_int['bb'] = merge_int['sb_condition'].apply(lambda x : int(x == 'bb'))
merge_int['bs'] = merge_int['sb_condition'].apply(lambda x : int(x == 'bs'))
merge_int['ss'] = merge_int['sb_condition'].apply(lambda x : int(x == 'ss'))

# get interaction
merge_int['sb_treatment'] = merge_int.apply(lambda x : x['sb'] * x['treatment'], axis = 1)
merge_int['bb_treatment'] = merge_int.apply(lambda x : x['bb'] * x['treatment'], axis = 1)
merge_int['bs_treatment'] = merge_int.apply(lambda x : x['bs'] * x['treatment'], axis = 1)
merge_int['ss_treatment'] = merge_int.apply(lambda x : x['ss'] * x['treatment'], axis = 1)

In [20]:
# get X, Y
merge_int_X = merge_int[['treatment', 'sb', 'bs', 'bb', 'sb_treatment', 'bs_treatment', 'bb_treatment']]
merge_int_Y = merge_int[['wtp']]

- 有交叉項

In [21]:
# run regression
merge_int_X = sm.add_constant(merge_int_X)
result = sm.OLS(merge_int_Y, merge_int_X).fit()
result.summary(xname=['const','treatment', 'sb', 'bs', 'bb', 'sb_treatment', 'bs_treatment', 'bb_treatment'], 
               yname='wtp')

0,1,2,3
Dep. Variable:,wtp,R-squared:,0.1
Model:,OLS,Adj. R-squared:,0.093
Method:,Least Squares,F-statistic:,14.18
Date:,"Fri, 03 Sep 2021",Prob (F-statistic):,1.54e-17
Time:,17:46:21,Log-Likelihood:,-4061.3
No. Observations:,900,AIC:,8139.0
Df Residuals:,892,BIC:,8177.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,20.5524,2.162,9.505,0.000,16.309,24.796
treatment,-3.4000,3.058,-1.112,0.266,-9.401,2.601
sb,7.5810,3.058,2.479,0.013,1.580,13.582
bs,7.2857,3.058,2.383,0.017,1.284,13.287
bb,19.2254,2.883,6.669,0.000,13.567,24.883
sb_treatment,5.8857,4.324,1.361,0.174,-2.601,14.373
bs_treatment,-1.0857,4.324,-0.251,0.802,-9.573,7.401
bb_treatment,-0.1259,4.077,-0.031,0.975,-8.128,7.876

0,1,2,3
Omnibus:,27.121,Durbin-Watson:,0.88
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27.76
Skew:,0.405,Prob(JB):,9.38e-07
Kurtosis:,2.712,Cond. No.,13.0


- 沒交叉項

In [22]:
merge_X = merge_int[['treatment', 'sb', 'bs', 'bb']]
merge_Y = merge_int[['wtp']]

In [23]:
# run regression
merge_X = sm.add_constant(merge_X)
result = sm.OLS(merge_Y, merge_X).fit()
result.summary(xname=['const','treatment', 'sb', 'bs', 'bb'], 
               yname='wtp')

0,1,2,3
Dep. Variable:,wtp,R-squared:,0.097
Model:,OLS,Adj. R-squared:,0.093
Method:,Least Squares,F-statistic:,23.99
Date:,"Fri, 03 Sep 2021",Prob (F-statistic):,7.14e-19
Time:,17:46:21,Log-Likelihood:,-4063.0
No. Observations:,900,AIC:,8136.0
Df Residuals:,895,BIC:,8160.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,20.0113,1.698,11.784,0.000,16.678,23.344
treatment,-2.3178,1.477,-1.569,0.117,-5.217,0.582
sb,10.5238,2.163,4.866,0.000,6.280,14.768
bs,6.7429,2.163,3.118,0.002,2.499,10.987
bb,19.1624,2.039,9.399,0.000,15.161,23.164

0,1,2,3
Omnibus:,27.862,Durbin-Watson:,0.882
Prob(Omnibus):,0.0,Jarque-Bera (JB):,28.82
Skew:,0.416,Prob(JB):,5.52e-07
Kurtosis:,2.726,Cond. No.,5.56


### ```將 control 的 wtp 改為取大或是取小```
- a + b -> max(a, b) 

In [38]:
control_max = control_wtp[['treatment', 'player.wtp_voting_cost', 'player.wtp_voting_cost_pq', 'sb_condition']]
control_max['wtp'] = control_max.apply(lambda x : max(x['player.wtp_voting_cost'], x['player.wtp_voting_cost_pq']), axis = 1)
control_max = control_max[['wtp', 'treatment', 'sb_condition']]

merge_max = pd.concat([treatment_wtp, control_max])

In [43]:
# get dummies
merge_max['sb'] = merge_max['sb_condition'].apply(lambda x : int(x == 'sb'))
merge_max['bb'] = merge_max['sb_condition'].apply(lambda x : int(x == 'bb'))
merge_max['bs'] = merge_max['sb_condition'].apply(lambda x : int(x == 'bs'))
merge_max['ss'] = merge_max['sb_condition'].apply(lambda x : int(x == 'ss'))

# get interaction
merge_max['sb_treatment'] = merge_max.apply(lambda x : x['sb'] * x['treatment'], axis = 1)
merge_max['bb_treatment'] = merge_max.apply(lambda x : x['bb'] * x['treatment'], axis = 1)
merge_max['bs_treatment'] = merge_max.apply(lambda x : x['bs'] * x['treatment'], axis = 1)
merge_max['ss_treatment'] = merge_max.apply(lambda x : x['ss'] * x['treatment'], axis = 1)

In [45]:
# get X, Y
merge_max_X = merge_max[['treatment', 'sb', 'bs', 'bb', 'sb_treatment', 'bs_treatment', 'bb_treatment']]
merge_max_Y = merge_max[['wtp']]

In [46]:
# run regression
merge_max_X = sm.add_constant(merge_max_X)
result = sm.OLS(merge_max_Y, merge_max_X).fit()
result.summary(xname=['const','treatment', 'sb', 'bs', 'bb', 'sb_treatment', 'bs_treatment', 'bb_treatment'], 
               yname='wtp')

0,1,2,3
Dep. Variable:,wtp,R-squared:,0.124
Model:,OLS,Adj. R-squared:,0.117
Method:,Least Squares,F-statistic:,17.96
Date:,"Fri, 03 Sep 2021",Prob (F-statistic):,2.08e-22
Time:,17:57:48,Log-Likelihood:,-3874.7
No. Observations:,900,AIC:,7765.0
Df Residuals:,892,BIC:,7804.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,15.0286,1.757,8.552,0.000,11.580,18.478
treatment,2.1238,2.485,0.855,0.393,-2.754,7.001
sb,4.2571,2.485,1.713,0.087,-0.620,9.135
bs,8.5810,2.485,3.453,0.001,3.703,13.459
bb,8.0085,2.343,3.418,0.001,3.410,12.607
sb_treatment,9.2095,3.515,2.620,0.009,2.312,16.107
bs_treatment,-2.3810,3.515,-0.677,0.498,-9.279,4.517
bb_treatment,11.0910,3.314,3.347,0.001,4.588,17.594

0,1,2,3
Omnibus:,13.747,Durbin-Watson:,0.884
Prob(Omnibus):,0.001,Jarque-Bera (JB):,14.025
Skew:,0.291,Prob(JB):,0.000901
Kurtosis:,2.815,Cond. No.,13.0


- a + b -> min(a, b)

In [47]:
control_min = control_wtp[['treatment', 'player.wtp_voting_cost', 'player.wtp_voting_cost_pq', 'sb_condition']]
control_min['wtp'] = control_min.apply(lambda x : min(x['player.wtp_voting_cost'], x['player.wtp_voting_cost_pq']), axis = 1)
control_min = control_min[['wtp', 'treatment', 'sb_condition']]

merge_min = pd.concat([treatment_wtp, control_min])

In [49]:
# get dummies
merge_min['sb'] = merge_min['sb_condition'].apply(lambda x : int(x == 'sb'))
merge_min['bb'] = merge_min['sb_condition'].apply(lambda x : int(x == 'bb'))
merge_min['bs'] = merge_min['sb_condition'].apply(lambda x : int(x == 'bs'))
merge_min['ss'] = merge_min['sb_condition'].apply(lambda x : int(x == 'ss'))

# get interaction
merge_min['sb_treatment'] = merge_min.apply(lambda x : x['sb'] * x['treatment'], axis = 1)
merge_min['bb_treatment'] = merge_min.apply(lambda x : x['bb'] * x['treatment'], axis = 1)
merge_min['bs_treatment'] = merge_min.apply(lambda x : x['bs'] * x['treatment'], axis = 1)
merge_min['ss_treatment'] = merge_min.apply(lambda x : x['ss'] * x['treatment'], axis = 1)

In [50]:
# get X, Y
merge_min_X = merge_min[['treatment', 'sb', 'bs', 'bb', 'sb_treatment', 'bs_treatment', 'bb_treatment']]
merge_min_Y = merge_min[['wtp']]

In [51]:
# run regression
merge_min_X = sm.add_constant(merge_min_X)
result = sm.OLS(merge_min_Y, merge_min_X).fit()
result.summary(xname=['const','treatment', 'sb', 'bs', 'bb', 'sb_treatment', 'bs_treatment', 'bb_treatment'], 
               yname='wtp')

0,1,2,3
Dep. Variable:,wtp,R-squared:,0.298
Model:,OLS,Adj. R-squared:,0.293
Method:,Least Squares,F-statistic:,54.2
Date:,"Fri, 03 Sep 2021",Prob (F-statistic):,1.42e-64
Time:,18:00:21,Log-Likelihood:,-3823.9
No. Observations:,900,AIC:,7664.0
Df Residuals:,892,BIC:,7702.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.5238,1.661,3.326,0.001,2.264,8.783
treatment,11.6286,2.349,4.951,0.000,7.019,16.238
sb,3.3238,2.349,1.415,0.157,-1.286,7.934
bs,-1.2952,2.349,-0.551,0.581,-5.905,3.315
bb,11.2169,2.214,5.065,0.000,6.871,15.563
sb_treatment,10.1429,3.322,3.053,0.002,3.624,16.662
bs_treatment,7.4952,3.322,2.256,0.024,0.976,14.015
bb_treatment,7.8825,3.132,2.517,0.012,1.736,14.029

0,1,2,3
Omnibus:,42.123,Durbin-Watson:,0.974
Prob(Omnibus):,0.0,Jarque-Bera (JB):,47.36
Skew:,0.522,Prob(JB):,5.2e-11
Kurtosis:,3.416,Cond. No.,13.0
