In [1]:
# Import modules and set options
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

sns.set(context='notebook', style='ticks')

In [2]:
lsl_dr = (pd.read_csv('../data/clean/lsl_dr.csv', index_col=0, low_memory=False)
                  .rename({'onset_1':'identify_mo'}, axis=1))

In [7]:
lsl_dr.study_id

Unnamed: 0,redcap_event_name,academic_year_rv,hl,male,_race,prim_lang,sib,_mother_ed,father_ed,par1_ed,...,gf3_sis_ss,gf3_siw_ss,gf_version,ppvt_f,ppvt_ss,rowpvt_ss,school,score,test_name,test_type
0,initial_assessment_arm_1,2002.0,0.0,0.0,0.0,0.0,1.0,6.0,6.0,,...,,,,,,,101,58.0,,EOWPVT
1,initial_assessment_arm_1,2002.0,0.0,0.0,0.0,0.0,1.0,6.0,6.0,,...,,,,,,,101,51.0,PLS,receptive
2,initial_assessment_arm_1,2002.0,0.0,0.0,0.0,0.0,1.0,6.0,6.0,,...,,,,,,,101,60.0,PLS,expressive
3,initial_assessment_arm_1,2009.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,,...,,,2.0,,,,628,74.0,,Goldman
4,initial_assessment_arm_1,2009.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,,...,,,,,,,628,96.0,CELF-4,receptive


In [9]:
lsl_dr_unique = lsl_dr.drop_duplicates(subset='study_id').copy()

## Demographics

In [10]:
lsl_dr_unique.male.replace({1: 'Male', 0: 'Female', np.nan: 'Missing'}).value_counts()

Male       2531
Female     2256
Missing      56
Name: male, dtype: int64

In [13]:
lsl_dr_unique.race.replace({0:'White', 1:'Black', 2:'Hispanic', 3:'Asian', 4:'Other', np.nan:'Missing'}).value_counts()

White       2625
Hispanic     694
Black        520
Other        485
Asian        378
Missing      141
Name: race, dtype: int64

In [19]:
(lsl_dr_unique.premature_age.replace({9:None})==8).replace({True: '>=36 weeks', False: '<36 weeks', 
                                                            np.nan: 'Unknown'}).value_counts()

>=36 weeks    3362
<36 weeks     1481
Name: premature_age, dtype: int64

In [25]:
lsl_dr_unique.sib.replace({0:'1', 1:'2', 2:'3', 3:'4+', np.nan:'Missing'}).value_counts().sort_index()

1          1225
2          1835
3           861
4+          501
Missing     421
Name: sib, dtype: int64

In [28]:
ed_lookup = {0:"8th grade or less",
1:"Some high school",
2:"High school diploma/GED",
3:"Some college",
4:"Bachelor's degree",
5:"Post graduate degree",
6:"Unknown",
np.nan:"Unknown"}

lsl_dr_unique.mother_ed.replace(ed_lookup).value_counts()

Unknown                    1769
Some high school           1179
High school diploma/GED    1089
Some college                599
8th grade or less           207
Name: mother_ed, dtype: int64

In [29]:
lsl_dr_unique.father_ed.replace(ed_lookup).value_counts()

Unknown                    2050
Bachelor's degree           909
Post graduate degree        589
High school diploma/GED     547
Some college                542
Some high school            152
8th grade or less            54
Name: father_ed, dtype: int64

In [32]:
family_lookup = {0:"Ideal Participation",
1:"Good Participation",
2:"Average Participation",
3:"Below Average",
4:"Limited Participation",
np.nan:"Missing"}

lsl_dr_unique.family_inv.replace(family_lookup).value_counts()

Ideal Participation      1403
Good Participation       1130
Missing                   981
Average Participation     936
Below Average             308
Limited Participation      85
Name: family_inv, dtype: int64

In [35]:
lsl_dr_unique[["age_diag", "age_amp", "age_int", "age"]].describe()

Unnamed: 0,age_diag,age_amp,age_int,age
count,3684.0,3434.0,3386.0,4745.0
mean,11.389617,16.740026,17.981099,31.810537
std,17.260694,18.488904,19.393178,26.691879
min,0.0,0.0,0.0,0.0
25%,1.0,4.0,4.0,11.0
50%,3.0,9.0,12.0,28.0
75%,18.0,24.0,25.0,43.0
max,154.0,173.0,200.0,227.0


## Service

In [38]:
lsl_dr_unique.otherserv.replace({0: "OPTION + outside", 1: "OPTION only", 2: "Missing"}).value_counts()

OPTION only         1791
OPTION + outside     787
Missing              707
Name: otherserv, dtype: int64

In [41]:
lsl_dr['age_test_year'] = (lsl_dr.age_test/12).round(0)

In [42]:
lsl_dr.groupby(['domain', 'age_test_year']).score.describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
domain,age_test_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Articulation,2.0,64.0,85.781250,12.480421,54.0,76.75,86.5,95.00,120.0
Articulation,3.0,789.0,84.704689,17.091356,40.0,72.00,84.0,97.00,126.0
Articulation,4.0,1673.0,83.874477,19.596065,0.0,70.00,85.0,99.00,125.0
Articulation,5.0,1343.0,82.457185,20.933599,0.0,67.00,85.0,99.00,120.0
Articulation,6.0,998.0,80.725451,20.941931,39.0,68.00,84.0,97.00,114.0
Articulation,7.0,518.0,79.980695,21.859663,3.0,65.25,84.0,98.00,115.0
Articulation,8.0,398.0,77.673367,22.548215,0.0,60.00,81.0,98.75,112.0
Articulation,9.0,226.0,82.252212,19.963814,39.0,70.00,86.0,100.00,128.0
Articulation,10.0,187.0,80.668449,21.220988,39.0,66.50,85.0,100.00,109.0
Articulation,11.0,122.0,79.983607,19.923815,39.0,67.50,83.0,99.00,105.0


## Hearing loss

In [43]:
print('N:', ((lsl_dr_unique.degree_hl_ad.notnull()) & (lsl_dr_unique.degree_hl_as.notnull())).sum())

print("\nNormal")

normal = (lsl_dr_unique.degree_hl_ad == 0) & (lsl_dr_unique.degree_hl_as == 0)

print('N:', normal.sum())
print('%:', round(normal.mean(), 2)*100)

print("\nBilateral not profound")

bilat_not_profound = (lsl_dr_unique.degree_hl_ad.isin([1, 2, 3,4, 5]) &
lsl_dr_unique.degree_hl_as.isin([1, 2, 3, 3, 4 ,5]))

print('N:', bilat_not_profound.sum())
print('%:', round(bilat_not_profound.mean(), 2)*100)

bilat_profound = (lsl_dr_unique.degree_hl_ad==6) & (lsl_dr_unique.degree_hl_as==6)

print("\nBilateral profound")

print('N:', bilat_profound.sum())
print('%:', round(bilat_profound.mean(), 2)*100)

unilat_profound = (((lsl_dr_unique.degree_hl_ad.isin([1, 2, 3,4, 5])
                    & (lsl_dr_unique.degree_hl_as==6))) |
                ((lsl_dr_unique.degree_hl_as.isin([1, 2, 3, 3, 4 ,5])) 
                 & (lsl_dr_unique.degree_hl_ad==6)))

print("\nUnilateral profound")

print('N:', unilat_profound.sum())
print('%:', round(unilat_profound.mean(), 2)*100)


unilat_not_profound = (((lsl_dr_unique.degree_hl_ad.isin([1, 2, 3,4, 5])
                    & (lsl_dr_unique.degree_hl_as==0))) |
                ((lsl_dr_unique.degree_hl_as.isin([1, 2, 3, 3, 4 ,5])) 
                 & (lsl_dr_unique.degree_hl_ad==0)))

print("\nUnilateral not profound")

print('N:', unilat_not_profound.sum())
print('%:', round(unilat_not_profound.mean(), 2)*100)

assymetrical = (((lsl_dr_unique.degree_hl_ad == 1) & (lsl_dr_unique.degree_hl_as == 2)) |
((lsl_dr_unique.degree_hl_ad == 2) & (lsl_dr_unique.degree_hl_as == 1)) |
((lsl_dr_unique.degree_hl_ad == 1) & (lsl_dr_unique.degree_hl_as == 3)) |
((lsl_dr_unique.degree_hl_ad == 3) & (lsl_dr_unique.degree_hl_as == 1)) |
((lsl_dr_unique.degree_hl_ad == 1) & (lsl_dr_unique.degree_hl_as == 4)) |
((lsl_dr_unique.degree_hl_ad == 4) & (lsl_dr_unique.degree_hl_as == 1)) |
((lsl_dr_unique.degree_hl_ad == 1) & (lsl_dr_unique.degree_hl_as == 5)) |
((lsl_dr_unique.degree_hl_ad == 5) & (lsl_dr_unique.degree_hl_as == 1)) |
((lsl_dr_unique.degree_hl_ad == 1) & (lsl_dr_unique.degree_hl_as == 6)) |
((lsl_dr_unique.degree_hl_ad == 6) & (lsl_dr_unique.degree_hl_as == 1)) |
((lsl_dr_unique.degree_hl_ad == 2) & (lsl_dr_unique.degree_hl_as == 3)) |
((lsl_dr_unique.degree_hl_ad == 3) & (lsl_dr_unique.degree_hl_as == 2)) |
((lsl_dr_unique.degree_hl_ad == 4) & (lsl_dr_unique.degree_hl_as == 2)) |
((lsl_dr_unique.degree_hl_ad == 2) & (lsl_dr_unique.degree_hl_as == 4)) |
((lsl_dr_unique.degree_hl_ad == 5) & (lsl_dr_unique.degree_hl_as == 2)) |
((lsl_dr_unique.degree_hl_ad == 2) & (lsl_dr_unique.degree_hl_as == 5)) |
((lsl_dr_unique.degree_hl_ad == 6) & (lsl_dr_unique.degree_hl_as == 2)) |
((lsl_dr_unique.degree_hl_ad == 2) & (lsl_dr_unique.degree_hl_as == 6)) |
((lsl_dr_unique.degree_hl_ad == 3) & (lsl_dr_unique.degree_hl_as == 4)) |
((lsl_dr_unique.degree_hl_ad == 4) & (lsl_dr_unique.degree_hl_as == 3)) |
((lsl_dr_unique.degree_hl_ad == 3) & (lsl_dr_unique.degree_hl_as == 5)) |
((lsl_dr_unique.degree_hl_ad == 5) & (lsl_dr_unique.degree_hl_as == 3)) |
((lsl_dr_unique.degree_hl_ad == 3) & (lsl_dr_unique.degree_hl_as == 6)) |
((lsl_dr_unique.degree_hl_ad == 6) & (lsl_dr_unique.degree_hl_as == 3)) |
((lsl_dr_unique.degree_hl_ad == 4) & (lsl_dr_unique.degree_hl_as == 5)) |
((lsl_dr_unique.degree_hl_ad == 5) & (lsl_dr_unique.degree_hl_as == 4)) |
((lsl_dr_unique.degree_hl_ad == 4) & (lsl_dr_unique.degree_hl_as == 6)) |
((lsl_dr_unique.degree_hl_ad == 6) & (lsl_dr_unique.degree_hl_as == 4)) |
((lsl_dr_unique.degree_hl_ad == 5) & (lsl_dr_unique.degree_hl_as == 6)) |
((lsl_dr_unique.degree_hl_ad == 6) & (lsl_dr_unique.degree_hl_as == 5)))

print("\nAssymetric")

print('N:', assymetrical.sum())
print('%:', round(assymetrical.mean(), 2)*100)

print('\nMissing')

missing = ((lsl_dr_unique.degree_hl_ad.isnull()) | (lsl_dr_unique.degree_hl_as.isnull()))

print('N:', missing.sum())
print('%:', round(missing.mean(), 2)*100)

N: 4236

Normal
N: 95
%: 2.0

Bilateral not profound
N: 1926
%: 40.0

Bilateral profound
N: 1396
%: 28.999999999999996

Unilateral profound
N: 456
%: 9.0

Unilateral not profound
N: 289
%: 6.0

Assymetric
N: 1086
%: 22.0

Missing
N: 607
%: 13.0


In [46]:
hl = lsl_dr_unique[['study_id', 'type_hl_ad', 'type_hl_as']].copy()

hl['bilateral_snhl'] = 0
hl.loc[(hl.type_hl_ad == 0) & (hl.type_hl_as == 0), 'bilateral_snhl'] = 1

hl['unilateral_snhl'] = 0
hl.loc[(hl.type_hl_ad == 0) & (hl.type_hl_as == 4), 'unilateral_snhl'] = 1
hl.loc[(hl.type_hl_ad == 4) & (hl.type_hl_as == 0), 'unilateral_snhl'] = 1

hl['bilateral_an'] = 0
hl.loc[(hl.type_hl_ad == 3) & (hl.type_hl_as == 3), 'bilateral_an'] = 1

hl['unilateral_an'] = 0
hl.loc[(hl.type_hl_ad == 0) & (hl.type_hl_as == 3), 'unilateral_an'] = 1
hl.loc[(hl.type_hl_ad == 3) & (hl.type_hl_as == 0), 'unilateral_an'] = 1

hl['normal'] = 0
hl.loc[(hl.type_hl_ad == 4) & (hl.type_hl_as == 4), 'normal'] = 1

hl['unknown'] = 0
hl.loc[(hl.type_hl_ad == 5) & (hl.type_hl_as == 5), 'unknown'] = 1

n = (hl.type_hl_ad.notnull() & hl.type_hl_as.notnull()).sum()
print('Total observations')
print(n)

print('\nBilateral SNHL:')
print('N:', sum(hl['bilateral_snhl'] == 1))
print('%:', round(sum(hl['bilateral_snhl'] == 1)/n, 2)*100)

print('\nUnilateral SNHL')
print('N:', sum(hl['unilateral_snhl'] == 1))
print('%:', round(sum(hl['unilateral_snhl'] == 1)/n, 2)*100)

print('\nBilateral Auditory Neuropathy')
print('N:', sum(hl['bilateral_an'] == 1))
print('%:', round(sum(hl['bilateral_an'] == 1)/n, 2)*100)

print('\nUnilateral Auditory Neuropathy')
print('N:', sum(hl['unilateral_an'] == 1))
print('%:', round(sum(hl['unilateral_an'] == 1)/n, 2)*100)

print('\nNormal')
print('N:', sum(hl['normal'] == 1))
print('%:', round(sum(hl['normal'] == 1)/n, 2)*100)

print('\nUnknown')
print('N:', sum(hl['unknown'] == 1))
print('%:', round(sum(hl['unknown'] == 1)/n, 2)*100)

print('\nMissing')
missing = (hl.type_hl_ad.isnull() | hl.type_hl_as.isnull())
print('N:', missing.sum())
print('%:', round(missing.mean(), 2)*100)

Total observations
4168

Bilateral SNHL:
N: 3169
%: 76.0

Unilateral SNHL
N: 222
%: 5.0

Bilateral Auditory Neuropathy
N: 132
%: 3.0

Unilateral Auditory Neuropathy
N: 17
%: 0.0

Normal
N: 97
%: 2.0

Unknown
N: 0
%: 0.0

Missing
N: 675
%: 14.000000000000002
