In [None]:
import pandas as pd
from scipy.stats import ttest_ind, chi2_contingency
import statsmodels.api as sm

In [None]:
df = pd.read_csv('../data/v2_BabyStudy2_16jan21.csv', sep=';')

In [None]:
gender_column = ['gender']
brosis_column = ['siblings']
prosocial_columns = [
    'PV_protest_new_yn',
    'PV_tattling_new_yn',
    'PV_prosocial_new_yn',
]
needed_columns = gender_column + brosis_column + prosocial_columns

In [None]:
df_select = df[needed_columns].copy()
df_select['constant'] = 1
df_select['siblings_yn'] = 0
df_select.loc[lambda x: x['siblings']>0, 'siblings_yn'] = 1

In [None]:
for c in df_select.columns:
    df_select = df_select.loc[lambda x: ~x[c].isnull()]
    df_select = df_select.loc[lambda x: x[c]!='']
    df_select = df_select.loc[lambda x: x[c]!=' ']
    df_select[c] = df_select[c].astype(int)

### check 0 ==> general checks

In [None]:
df_select[:5]

### check 1 ==> gender x pro-social ==> indepedent sample T test

In [None]:
group1_male = df_select.loc[lambda x: x['gender']==1].copy()
group2_female = df_select.loc[lambda x: x['gender']==2].copy()

#### - t test

In [None]:
for p in prosocial_columns:
    print(f'-> {p}:')
    result = ttest_ind(group1_male[p], group2_female[p])
    print(f'==> t-statistic {round(result[1], 4)}')
    print(f'==> P-value {round(result[1], 4)}\n')

#### - chi square test

In [None]:
for p in prosocial_columns:
    print(f'-> {p}:')
    crosstable = pd.crosstab(df_select['gender'], df[p]) 
    c, p, dof, expected = chi2_contingency(crosstable)
    print(f'==> P-value {round(p, 4)}\n')

### check 2 ==> brosis x pro-social ==> logistic regression

In [None]:
dfcheck = df_select.groupby(['siblings', 'PV_prosocial_new_yn'], as_index=False)[['gender']].count()

In [None]:
for s in dfcheck['siblings'].unique():
    dfcheck.loc[lambda x: x['siblings']==s, 'share'] = dfcheck.loc[lambda x: x['siblings']==s]['gender'] / dfcheck.loc[lambda x: x['siblings']==s]['gender'].sum()
# print(dfcheck)

In [None]:
for p in prosocial_columns:
    print(f'==> {p}: ==> \n')

    X_train = df_select[brosis_column + ['constant']]
    y_train = df_select[[p]] 
    model = sm.Logit(y_train, X_train).fit() 

    print(model.summary())
    print('\n')

### check 3 ==> moderation brosis -> gender x pro-social ==> PROCESS

In [None]:
from pyprocessmacro import Process

In [None]:
for p in prosocial_columns:
    print(f'==> {p}: ==>\n')
    print(' ')
    model = Process(data=df_select,
                    model=1,
                    x="gender",
                    y=p,
                    m=["siblings_yn"])

    print(model.summary())
    print('\n')

### check 4 (extra) ==> moderation gender -> brosis x pro-social ==> PROCESS

In [None]:
from pyprocessmacro import Process

In [None]:
for p in prosocial_columns:
    print(f'==> {p}: ==>\n')
    print(' ')
    model = Process(data=df_select,
                    model=1,
                    x="siblings_yn",
                    y=p,
                    m=["gender"])

    print(model.summary())
    print('\n')