# Descriptive and stats of sample 

In [67]:
import pandas as pd 
from pathlib import Path
import numpy as np

current_dir = Path.cwd()
data_file = str(current_dir.parent / 'input' / 'Master_BHS.csv')
dmdmean_file = str(Path.cwd().parent / 'input' / 'preprocessed' / 'Pre' / 'mean_modes.csv')
part_info = pd.read_csv(data_file)
dmd_mean = pd.read_csv(dmdmean_file, index_col = 0)
vpn_to_select = dmd_mean.part.unique()
part_info = part_info[part_info.vpn.isin(vpn_to_select)].replace(r'\s+', np.nan, regex=True)
part_info = part_info[part_info.gruppe_alter > 1][['mvclinks','mvcrechts','gruppe_exp','taktileexpscore','alter']]
part_info.taktileexpscore = part_info.taktileexpscore.astype('float')


In [62]:
part_info.groupby('gruppe_exp').describe().T

Unnamed: 0,gruppe_exp,1,2
mvclinks,count,22.0,21.0
mvclinks,mean,48.887673,52.511195
mvclinks,std,16.66557,21.484999
mvclinks,min,29.9529,20.8555
mvclinks,25%,35.767475,40.465
mvclinks,50%,42.6273,46.4549
mvclinks,75%,63.524525,64.9837
mvclinks,max,82.3158,109.574
mvcrechts,count,22.0,21.0
mvcrechts,mean,52.220041,64.185408


In [63]:
from scipy.stats import shapiro

def test_normality(df, test, output = False):
    stats = []
    print(test) 
    for v in ['mvclinks','mvcrechts', 'taktileexpscore','alter']:
        print('%s:' %v)
        for g in  df.gruppe_exp.unique():
            data = df[(df.gruppe_exp == g)][v]
            stat , p = test(data)
            print('Group: %s / variable: %s / Statistics=%.3f, p=%.3f' % (g,v,stat, p)) 
            stats.append((stat,p))
        print('\n')
    if output == True:
        return(stats) 

test_normality(part_info, shapiro)

<function shapiro at 0x7f717f6cda60>
mvclinks:
Group: 2 / variable: mvclinks / Statistics=0.906, p=0.046
Group: 1 / variable: mvclinks / Statistics=0.882, p=0.013


mvcrechts:
Group: 2 / variable: mvcrechts / Statistics=0.902, p=0.038
Group: 1 / variable: mvcrechts / Statistics=0.830, p=0.002


taktileexpscore:
Group: 2 / variable: taktileexpscore / Statistics=0.928, p=0.125
Group: 1 / variable: taktileexpscore / Statistics=nan, p=1.000


alter:
Group: 2 / variable: alter / Statistics=0.903, p=0.041
Group: 1 / variable: alter / Statistics=0.904, p=0.035




In [64]:
from scipy.stats import mannwhitneyu, ttest_ind
from statsmodels.stats.multitest import fdrcorrection



def calc_stat(df, test, vars, output = False):
    stats = {}
    for v in vars:
        x = df[(df.gruppe_exp == 1)][v].dropna()
        y = df[(df.gruppe_exp == 2)][v].dropna()
        stat , p = test(x,y)
        stats[v] = (stat,p)
    if output == True:
        return(stats) 

result_no_par= calc_stat(part_info, mannwhitneyu, output = True, vars = ['mvclinks','mvcrechts', 'alter'])
result_par  = calc_stat(part_info, ttest_ind, output = True, vars = ['taktileexpscore'])
result_no_par['taktileexpscore'] = result_par['taktileexpscore']

res = pd.DataFrame(result_no_par).T
res.columns = ['Statistical Value', 'p']
print(res)

                 Statistical Value             p
mvclinks                204.000000  2.598342e-01
mvcrechts               147.000000  2.124159e-02
alter                   211.000000  3.173155e-01
taktileexpscore         -10.987525  1.192524e-13
