# Analysis Age & Sex

1 July 2023 Guido Cattani

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import shapiro as shapiro

In [2]:
def select_bh5(df):
    # select BAHA5P data
    is_baha5p =  df['Device_implant']=='BAHA5P'
    df_baha5p = df[is_baha5p]
    df_baha5p.pop('Device_implant')
    return(df_baha5p)

In [3]:
def read_subjects():
    f_in =  '/media/guido/LACIE/Cingle_Guido/Master/Study_key/age&sex_implant.csv'
    p_in = Path(f_in)   
    df = pd.read_csv(p_in)
    df = df.fillna(pd.NA)
    df.dropna(inplace = True)
    return df

In [4]:
def elasped_years(date, birthday):
    date1 = pd.to_datetime(date, dayfirst=True)
    birthday1 = pd.to_datetime(birthday, dayfirst=True)
    diff = date1 - birthday1
    diff_years = (diff / np.timedelta64(1, 'Y')).round(1)
    return diff_years

In [5]:
def descriptive_stat(srs):
    # calculate quantiles
    quantiles = srs.quantile([0.1, 0.5, 0.9]).round(1)
    quantiles.index = ['P10', 'P50', 'P90']
    quantiles = pd.DataFrame(quantiles, columns = ['age_at_fitting'])
 
    # calculate mean and standard deviation
    mean_age = (pd.DataFrame({'Mean': [round(srs.mean(),1)]}, index=['age_at_fitting'])).T
    std_age = (pd.DataFrame({'St. dev.': [round(srs.std(), 1)]}, index=['age_at_fitting'])).T

    # perform Shapiro test 
    sht = shapiro_test_norm(srs)

    # combine results into a single DataFrame
    res = pd.concat([quantiles, mean_age, std_age, sht])
    
    return res.round(1)

In [6]:
def shapiro_test_norm(srs):
    # check normality with Shapiro-Wilk test

    arr = srs.T.to_numpy()

    shapiro_stats, p_value = np.round(shapiro(arr),3) # arr scipy.stats Shapiro-Wilk test 
    
    is_normal_distr = False if p_value < 0.05 else True
    shapiro_test = pd.DataFrame({
        'Shapiro test statistic': shapiro_stats,
        'p-value': p_value,
        'normally distributed': is_normal_distr
    }, index=['age_at_fitting']).transpose()

    return shapiro_test.round(3)

In [7]:
data = read_subjects()
len(data)

23

In [8]:
#calculate age at fitting
data['age_at_fitting'] = elasped_years(data['Datum_Bcdirect_implant'], data['DoB'])

In [9]:
# select data BAHA5 group (exclusion BP110 subjects)
data_bh5 = select_bh5(data)
len(data_bh5)

20

In [10]:
# save data subjects BAHA5-group to file
data_bh5.set_index('Study_ID', inplace = True)
p = Path('/media/guido/LACIE/Cingle_Guido/Master/Study_key/age_fitting_implant.csv')
data_bh5.to_csv(p)
data_bh5

Unnamed: 0_level_0,DoB,sex,deaf_ear,Datum_Bcdirect_implant,age_at_fitting
Study_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
34,18-7-1973,2,2,3-11-2016,43.3
35,10-3-1954,1,1,26-1-2017,62.9
43,2-11-1953,1,1,23-9-2017,63.9
47,4-10-1962,2,1,13-12-2017,55.2
50,3-5-1971,1,2,2-8-2018,47.3
53,16-12-1962,2,1,5-10-2017,54.8
54,23-10-1949,1,1,18-1-2018,68.2
56,26-8-1948,1,2,20-12-2017,69.3
59,24-2-1966,1,1,4-7-2018,52.4
66,14-4-1959,2,2,7-11-2018,59.6


In [11]:
# descriptive stat age
age = data_bh5['age_at_fitting']
qa = descriptive_stat(age)
qa

Unnamed: 0,age_at_fitting
P10,47.6
P50,55.0
P90,68.3
Mean,57.5
St. dev.,8.6
Shapiro test statistic,0.969
p-value,0.724
normally distributed,True


In [12]:
# name file quantiles results
f_ar = "age_fitting.csv"

# save quantiles to file
d_ar = "/media/guido/LACIE/Cingle_Guido/Master/Implant/Analysis_Results/"
p_ar = Path(d_ar + f_ar)
qa.to_csv(p_ar)

In [13]:
# count males vs females
sex = data_bh5['sex']
sex = sex.astype("category")
sex.replace(to_replace={1:'Males', 2:'Females'}, inplace=True)
cnt_sex = sex.value_counts()
sx = pd.DataFrame(cnt_sex)
sx.rename(mapper={'sex':'Counts Sex'}, axis=1, inplace=True)
sx

Unnamed: 0,Counts Sex
Females,13
Males,7


In [14]:
# count deaf side 
deaf = data_bh5['deaf_ear']
deaf = deaf.astype("category")
deaf.replace(to_replace={1:'Left', 2:'Right'}, inplace=True)
cnt_deaf = deaf.value_counts()
deaf_side = pd.DataFrame(cnt_deaf)
deaf_side.rename(mapper={'deaf_ear':'Counts Deaf Ear Side'}, axis=1, inplace=True)
deaf_side

Unnamed: 0,Counts Deaf Ear Side
Left,12
Right,8


In [15]:
# name file counts sex results
f_ar = "sex.csv"

# save quantiles to file
d_ar = "/media/guido/LACIE/Cingle_Guido/Master/Implant/Analysis_Results/"
p_ar = Path(d_ar + f_ar)
sx.to_csv(p_ar)