## 交互作用探討
- 探討 is_pq, is_large 對於 wtp 之影響
- 使用 control

In [1]:
import os
import pandas as pd
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

### ```選取control資料```

In [2]:
all_data = pd.read_csv(r'C:\Users\User\Desktop\poll\data\processed_data\merge_data.csv', index_col=0)
data = all_data.loc[(all_data['treatment'] == 0)]

### ```製作 wtp 資料集```

In [3]:
# select non_pq data
non_pq_data = data[['player.id_in_group', 'player.wtp_voting_cost', 'player.is_large_team', 'session']]
non_pq_data.loc[:,'is_pq'] = 0
non_pq_data.columns = ['id','wtp', 'is_large', 'session', 'is_pq']

# select pq data
pq_data = data[['player.id_in_group','player.wtp_voting_cost_pq', 'player.is_large_team_pq', 'session']]
pq_data.loc[:, 'is_pq'] = 1
pq_data.columns = ['id','wtp', 'is_large', 'session', 'is_pq']

In [4]:
# concat two datasets
wtp_data = pd.concat([non_pq_data, pq_data])

# add interaction 
wtp_data['is_large * is_pq'] = wtp_data.apply(lambda x : x['is_large'] * x['is_pq'], axis = 1)

In [5]:
# split X and Y
wtp_data_Y = wtp_data['wtp']
wtp_Y = wtp_data_Y.to_numpy()

wtp_data_X = wtp_data.drop(['wtp','id', 'session'], axis = 1)
wtp_X = wtp_data_X.to_numpy()

 ### ```all```

In [6]:
# run regression
wtp_X = sm.add_constant(wtp_X)
result = sm.OLS(wtp_Y, wtp_X).fit()
result.summary(xname=['const','is_large', 'is_pq', 'is_large * is_pq'], yname='wtp')

0,1,2,3
Dep. Variable:,wtp,R-squared:,0.172
Model:,OLS,Adj. R-squared:,0.169
Method:,Least Squares,F-statistic:,61.93
Date:,"Thu, 02 Sep 2021",Prob (F-statistic):,2.17e-36
Time:,20:10:10,Log-Likelihood:,-3619.0
No. Observations:,900,AIC:,7246.0
Df Residuals:,896,BIC:,7265.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,13.3286,0.933,14.283,0.000,11.497,15.160
is_large,9.1131,1.278,7.132,0.000,6.605,11.621
is_pq,-7.8810,1.320,-5.972,0.000,-10.471,-5.291
is_large * is_pq,2.4226,1.807,1.341,0.180,-1.124,5.969

0,1,2,3
Omnibus:,48.286,Durbin-Watson:,0.997
Prob(Omnibus):,0.0,Jarque-Bera (JB):,40.456
Skew:,0.442,Prob(JB):,1.64e-09
Kurtosis:,2.454,Cond. No.,7.08


### ```一個一個跑回歸-有交乘項```
- 檢視 is_large, is_pq 正負號

In [7]:
sessionList = wtp_data['session'].unique()
sessionList

array(['0902_C'], dtype=object)

In [8]:
result_interaction = []

for session in sessionList:
    
    for playerID in range(1, 16):

        subset = wtp_data.loc[(wtp_data['id'] == playerID) & (wtp_data['session'] == session)]
        # split X and Y
        wtp_data_Y = subset['wtp']
        wtp_Y = wtp_data_Y.to_numpy()

        wtp_data_X = subset.drop(['wtp','id', 'session'], axis = 1)
        wtp_X = wtp_data_X.to_numpy()

        # run regression
        wtp_X = sm.add_constant(wtp_X)
        result = sm.OLS(wtp_Y, wtp_X).fit()

        # 0: const, 1: is_large, 2: is_pq, 3: is_large*is_pq
        is_large = '+' if result.params[1] > 0 else '-'
        is_pq = '+' if result.params[2] > 0 else '-'
        interaction = '+' if result.params[3] > 0 else '-'
        
        # p-value
        p_is_large = result.pvalues[1]
        p_is_pq = result.pvalues[2]
        p_is_interaction = result.pvalues[3]
        
        sig = []

        for item in [p_is_large, p_is_pq, p_is_interaction]:
            if (item < 0.001):
                sig.append('***')
            elif (item < 0.01):
                sig.append('**')
            elif (item < 0.05):
                sig.append('*')
            else:
                sig.append('')

        result_interaction.append({'session': session, 'ID':playerID, 'is_large': is_large, 'sig_is_large': sig[0], 
                                   'is_pq':is_pq, 'sig_is_pq':sig[1],
                                   'interaction':interaction, 'sig_interaction':sig[2]})

In [9]:
result_interaction = pd.DataFrame(result_interaction)

In [10]:
# 檢視係數正負號比例分配
result_interaction.groupby(['is_large', 'is_pq']).size()

is_large  is_pq
+         +        3
          -        7
-         -        5
dtype: int64

### ```一個個跑回歸-沒交乘項```

In [11]:
result_no_interaction = []

for session in sessionList:
    for playerID in range(1, 16):

        subset = wtp_data.loc[(wtp_data['id'] == playerID) & (wtp_data['session'] == session)]
        # split X and Y
        wtp_data_Y = subset['wtp']
        wtp_Y = wtp_data_Y.to_numpy()

        wtp_data_X = subset.drop(['wtp','id', 'session'], axis = 1)
        wtp_X = wtp_data_X.to_numpy()

        # run regression
        wtp_X = sm.add_constant(wtp_X)
        result = sm.OLS(wtp_Y, wtp_X).fit()

        # 0: const, 1: is_large, 2: is_pq
        is_large = '+' if result.params[1] > 0 else '-'
        is_pq = '+' if result.params[2] > 0 else '-'

        p_is_large = result.pvalues[1]
        p_is_pq = result.pvalues[2]
        sig = []

        for item in [p_is_large, p_is_pq]:
            if (item < 0.001):
                sig.append('***')
            elif (item < 0.01):
                sig.append('**')
            elif (item < 0.05):
                sig.append('*')
            else:
                sig.append('')

        result_no_interaction.append({'session': session, 'ID':playerID, 
                                      'is_large': is_large, 'sig_is_large': sig[0], 
                                      'is_pq':is_pq, 'sig_is_pq':sig[1]})

In [12]:
result_no_interaction = pd.DataFrame(result_no_interaction)

In [13]:
# 檢視結果特別的人
result_no_interaction.loc[(result_no_interaction['is_large'] == '+') & (result_no_interaction['is_pq'] == '+')]

Unnamed: 0,session,ID,is_large,sig_is_large,is_pq,sig_is_pq
1,0902_C,2,+,***,+,
6,0902_C,7,+,,+,
10,0902_C,11,+,***,+,
