## 交互作用探討
- 探討 is_pq, is_large 對於 wtp 之影響
- 使用 control

In [1]:
import os
import pandas as pd
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

### ```選取control資料```

In [2]:
all_data = pd.read_csv(r'C:\Users\User\Desktop\poll\data\processed_data\merge_data.csv', index_col=0)
data = all_data.loc[(all_data['treatment'] == 0)]

### ```製作 wtp 資料集```

In [3]:
# select non_pq data
non_pq_data = data[['player.id_in_group', 'player.wtp_voting_cost', 'player.is_large_team', 'session']]
non_pq_data.loc[:,'is_pq'] = 0
non_pq_data.columns = ['id','wtp', 'is_large', 'session', 'is_pq']

# select pq data
pq_data = data[['player.id_in_group','player.wtp_voting_cost_pq', 'player.is_large_team_pq', 'session']]
pq_data.loc[:, 'is_pq'] = 1
pq_data.columns = ['id','wtp', 'is_large', 'session', 'is_pq']

In [4]:
# concat two datasets
wtp_data = pd.concat([non_pq_data, pq_data])

# add interaction 
wtp_data['is_large * is_pq'] = wtp_data.apply(lambda x : x['is_large'] * x['is_pq'], axis = 1)

In [5]:
# split X and Y
wtp_data_Y = wtp_data['wtp']
wtp_Y = wtp_data_Y.to_numpy()

wtp_data_X = wtp_data.drop(['wtp','id', 'session'], axis = 1)
wtp_X = wtp_data_X.to_numpy()

 ### ```all```

In [6]:
# run regression
wtp_X = sm.add_constant(wtp_X)
result = sm.OLS(wtp_Y, wtp_X).fit()
result.summary(xname=['const','is_large', 'is_pq', 'is_large * is_pq'], yname='wtp')

0,1,2,3
Dep. Variable:,wtp,R-squared:,0.187
Model:,OLS,Adj. R-squared:,0.186
Method:,Least Squares,F-statistic:,275.3
Date:,"Mon, 30 Aug 2021",Prob (F-statistic):,7.320000000000001e-161
Time:,21:17:52,Log-Likelihood:,-14083.0
No. Observations:,3600,AIC:,28170.0
Df Residuals:,3596,BIC:,28200.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,13.9190,0.418,33.329,0.000,13.100,14.738
is_large,6.2372,0.572,10.907,0.000,5.116,7.358
is_pq,-10.0726,0.591,-17.055,0.000,-11.231,-8.915
is_large * is_pq,1.8653,0.809,2.307,0.021,0.280,3.451

0,1,2,3
Omnibus:,270.311,Durbin-Watson:,0.993
Prob(Omnibus):,0.0,Jarque-Bera (JB):,334.786
Skew:,0.747,Prob(JB):,2e-73
Kurtosis:,3.01,Cond. No.,7.08


### ```一個一個跑回歸-有交乘項```
- 檢視 is_large, is_pq 正負號

In [7]:
sessionList = wtp_data['session'].unique()
sessionList

array(['0820_control', '0826_m_control', '0827_a_control',
       '0827_m_control'], dtype=object)

In [8]:
result_interaction = []

for session in sessionList:
    
    for playerID in range(1, 16):

        subset = wtp_data.loc[(wtp_data['id'] == playerID) & (wtp_data['session'] == session)]
        # split X and Y
        wtp_data_Y = subset['wtp']
        wtp_Y = wtp_data_Y.to_numpy()

        wtp_data_X = subset.drop(['wtp','id', 'session'], axis = 1)
        wtp_X = wtp_data_X.to_numpy()

        # run regression
        wtp_X = sm.add_constant(wtp_X)
        result = sm.OLS(wtp_Y, wtp_X).fit()

        # 0: const, 1: is_large, 2: is_pq, 3: is_large*is_pq
        is_large = '+' if result.params[1] > 0 else '-'
        is_pq = '+' if result.params[2] > 0 else '-'
        interaction = '+' if result.params[3] > 0 else '-'
        
        # p-value
        p_is_large = result.pvalues[1]
        p_is_pq = result.pvalues[2]
        p_is_interaction = result.pvalues[3]
        
        sig = []

        for item in [p_is_large, p_is_pq, p_is_interaction]:
            if (item < 0.001):
                sig.append('***')
            elif (item < 0.01):
                sig.append('**')
            elif (item < 0.05):
                sig.append('*')
            else:
                sig.append('')

        result_interaction.append({'session': session, 'ID':playerID, 'is_large': is_large, 'sig_is_large': sig[0], 
                                   'is_pq':is_pq, 'sig_is_pq':sig[1],
                                   'interaction':interaction, 'sig_interaction':sig[2]})

In [9]:
result_interaction = pd.DataFrame(result_interaction)

In [10]:
# 檢視係數正負號比例分配
result_interaction.groupby(['is_large', 'is_pq']).size()

is_large  is_pq
+         +         7
          -        42
-         -        11
dtype: int64

### ```一個個跑回歸-沒交乘項```

In [11]:
result_no_interaction = []

for session in sessionList:
    for playerID in range(1, 16):

        subset = wtp_data.loc[(wtp_data['id'] == playerID) & (wtp_data['session'] == session)]
        # split X and Y
        wtp_data_Y = subset['wtp']
        wtp_Y = wtp_data_Y.to_numpy()

        wtp_data_X = subset.drop(['wtp','id', 'session'], axis = 1)
        wtp_X = wtp_data_X.to_numpy()

        # run regression
        wtp_X = sm.add_constant(wtp_X)
        result = sm.OLS(wtp_Y, wtp_X).fit()

        # 0: const, 1: is_large, 2: is_pq
        is_large = '+' if result.params[1] > 0 else '-'
        is_pq = '+' if result.params[2] > 0 else '-'

        p_is_large = result.pvalues[1]
        p_is_pq = result.pvalues[2]
        sig = []

        for item in [p_is_large, p_is_pq]:
            if (item < 0.001):
                sig.append('***')
            elif (item < 0.01):
                sig.append('**')
            elif (item < 0.05):
                sig.append('*')
            else:
                sig.append('')

        result_no_interaction.append({'session': session, 'ID':playerID, 
                                      'is_large': is_large, 'sig_is_large': sig[0], 
                                      'is_pq':is_pq, 'sig_is_pq':sig[1]})

In [12]:
result_no_interaction = pd.DataFrame(result_no_interaction)

In [13]:
# 檢視結果特別的人
result_no_interaction.loc[(result_no_interaction['is_large'] == '+') & (result_no_interaction['is_pq'] == '+')]

Unnamed: 0,session,ID,is_large,sig_is_large,is_pq,sig_is_pq
1,0820_control,2,+,**,+,*
3,0820_control,4,+,,+,
12,0820_control,13,+,***,+,
19,0826_m_control,5,+,***,+,
34,0827_a_control,5,+,**,+,
41,0827_a_control,12,+,***,+,
48,0827_m_control,4,+,,+,
