In [1]:
import pandas as pd
from scipy.stats import ttest_ind, chi2_contingency
import statsmodels.api as sm

In [3]:
df = pd.read_csv('../data/v2_BabyStudy2_16jan21.csv', sep=';')

In [4]:
gender_column = ['gender']
brosis_column = ['siblings']
prosocial_columns = [
    'PV_protest_new_yn',
    'PV_tattling_new_yn',
    'PV_prosocial_new_yn',
]
needed_columns = gender_column + brosis_column + prosocial_columns

In [5]:
df_select = df[needed_columns].copy()
df_select['constant'] = 1
df_select['siblings_yn'] = 0
df_select.loc[lambda x: x['siblings']>0, 'siblings_yn'] = 1

In [6]:
for c in df_select.columns:
    df_select = df_select.loc[lambda x: ~x[c].isnull()]
    df_select = df_select.loc[lambda x: x[c]!='']
    df_select = df_select.loc[lambda x: x[c]!=' ']
    df_select[c] = df_select[c].astype(int)

### check 0 ==> general checks

In [7]:
df_select[:5]

Unnamed: 0,gender,siblings,PV_protest_new_yn,PV_tattling_new_yn,PV_prosocial_new_yn,constant,siblings_yn
1,1,3,0,0,0,1,1
2,2,1,0,0,0,1,1
3,1,2,0,0,0,1,1
4,2,1,0,0,0,1,1
5,1,1,0,0,0,1,1


### check 1 ==> gender x pro-social ==> indepedent sample T test

In [8]:
group1_male = df_select.loc[lambda x: x['gender']==1].copy()
group2_female = df_select.loc[lambda x: x['gender']==2].copy()

#### - t test

In [9]:
for p in prosocial_columns:
    print(f'-> {p}:')
    result = ttest_ind(group1_male[p], group2_female[p])
    print(f'==> t-statistic {round(result[1], 4)}')
    print(f'==> P-value {round(result[1], 4)}\n')

-> PV_protest_new_yn:
==> t-statistic 0.8085
==> P-value 0.8085

-> PV_tattling_new_yn:
==> t-statistic 0.2336
==> P-value 0.2336

-> PV_prosocial_new_yn:
==> t-statistic 0.8965
==> P-value 0.8965



#### - chi square test

In [10]:
for p in prosocial_columns:
    print(f'-> {p}:')
    crosstable = pd.crosstab(df_select['gender'], df[p]) 
    c, p, dof, expected = chi2_contingency(crosstable)
    print(f'==> P-value {round(p, 4)}\n')

-> PV_protest_new_yn:
==> P-value 0.9219

-> PV_tattling_new_yn:
==> P-value 0.3682

-> PV_prosocial_new_yn:
==> P-value 0.7956



### check 2 ==> brosis x pro-social ==> logistic regression

In [11]:
dfcheck = df_select.groupby(['siblings', 'PV_prosocial_new_yn'], as_index=False)[['gender']].count()

In [12]:
for s in dfcheck['siblings'].unique():
    dfcheck.loc[lambda x: x['siblings']==s, 'share'] = dfcheck.loc[lambda x: x['siblings']==s]['gender'] / dfcheck.loc[lambda x: x['siblings']==s]['gender'].sum()
# print(dfcheck)

In [13]:
for p in prosocial_columns:
    print(f'==> {p}: ==> \n')

    X_train = df_select[brosis_column + ['constant']]
    y_train = df_select[[p]] 
    model = sm.Logit(y_train, X_train).fit() 

    print(model.summary())
    print('\n')

==> PV_protest_new_yn: ==> 

Optimization terminated successfully.
         Current function value: 0.506217
         Iterations 5
                           Logit Regression Results                           
Dep. Variable:      PV_protest_new_yn   No. Observations:                   53
Model:                          Logit   Df Residuals:                       51
Method:                           MLE   Df Model:                            1
Date:                Fri, 19 Mar 2021   Pseudo R-squ.:                0.008757
Time:                        15:28:31   Log-Likelihood:                -26.829
converged:                       True   LL-Null:                       -27.066
Covariance Type:            nonrobust   LLR p-value:                    0.4911
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
siblings       0.3041      0.440      0.692      0.489      -0.557       1.166


### check 3 ==> moderation brosis -> gender x pro-social ==> PROCESS

In [14]:
from pyprocessmacro import Process

In [15]:
for p in prosocial_columns:
    print(f'==> {p}: ==>\n')
    print(' ')
    model = Process(data=df_select,
                    model=1,
                    x="gender",
                    y=p,
                    m=["siblings_yn"])

    print(model.summary())
    print('\n')

==> PV_protest_new_yn: ==>

 
Process successfully initialized.
Based on the Process Macro by Andrew F. Hayes, Ph.D. (www.afhayes.com)


****************************** SPECIFICATION ****************************

Model = 1

Variables:
    Cons = Cons
    x = gender
    y = PV_protest_new_yn
    m = siblings_yn

Sample size:
53

***************************** OUTCOME MODELS ****************************

Outcome = PV_protest_new_yn 
OLS Regression Summary

     R²  Adj. R²    MSE      F  df1  df2  p-value
 0.0067  -0.0760 0.1767 0.1106    3   49   0.9535

Coefficients

                     coeff     se       t      p    LLCI   ULCI
Cons                0.5000 0.5148  0.9712 0.3362 -0.5091 1.5091
gender             -0.2500 0.4459 -0.5607 0.5775 -1.1239 0.6239
siblings_yn        -0.2833 0.5508 -0.5144 0.6093 -1.3628 0.7961
gender*siblings_yn  0.2417 0.4637  0.5212 0.6046 -0.6671 1.1504

-------------------------------------------------------------------------


********************** CONDITIO

  and should_run_async(code)


### check 4 (extra) ==> moderation gender -> brosis x pro-social ==> PROCESS

In [16]:
for p in prosocial_columns:
    print(f'==> {p}: ==>\n')
    print(' ')
    model = Process(data=df_select,
                    model=1,
                    x="siblings_yn",
                    y=p,
                    m=["gender"])

    print(model.summary())
    print('\n')

==> PV_protest_new_yn: ==>

 
Process successfully initialized.
Based on the Process Macro by Andrew F. Hayes, Ph.D. (www.afhayes.com)


****************************** SPECIFICATION ****************************

Model = 1

Variables:
    Cons = Cons
    x = siblings_yn
    y = PV_protest_new_yn
    m = gender

Sample size:
53

***************************** OUTCOME MODELS ****************************

Outcome = PV_protest_new_yn 
OLS Regression Summary

     R²  Adj. R²    MSE      F  df1  df2  p-value
 0.0067  -0.0760 0.1767 0.1106    3   49   0.9535

Coefficients

                     coeff     se       t      p    LLCI   ULCI
Cons                0.5000 0.5148  0.9712 0.3362 -0.5091 1.5091
siblings_yn        -0.2833 0.5508 -0.5144 0.6093 -1.3628 0.7961
gender             -0.2500 0.4459 -0.5607 0.5775 -1.1239 0.6239
siblings_yn*gender  0.2417 0.4637  0.5212 0.6046 -0.6671 1.1504

-------------------------------------------------------------------------


********************** CONDITIO

  and should_run_async(code)
