In [1]:
# Import modules and set options
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

sns.set(context='notebook', style='ticks')

In [2]:
lsl_dr = (pd.read_csv('../data/clean/lsl_dr.csv', index_col=0, low_memory=False)
                  .rename({'onset_1':'identify_mo'}, axis=1))

In [3]:
unique_students = lsl_dr.drop_duplicates(subset='study_id').copy()

In [4]:
mask = (unique_students.age_test>=48) & (unique_students.age_test<60)
data_4yo = unique_students[mask]

## Demographics

In [5]:
data_4yo.male.replace({1: 'Male', 0: 'Female', np.nan: 'Missing'}).value_counts()

Male       437
Female     356
Missing     13
Name: male, dtype: int64

In [6]:
data_4yo.race.replace({0:'White', 1:'Black', 2:'Hispanic', 3:'Asian', 4:'Other', np.nan:'Missing'}).value_counts()

White       433
Hispanic    125
Black        81
Other        75
Asian        62
Missing      30
Name: race, dtype: int64

In [7]:
(data_4yo.premature_age.replace({9:None})==8).replace({True: '>=36 weeks', False: '<36 weeks', 
                                                            np.nan: 'Unknown'}).value_counts()

>=36 weeks    528
<36 weeks     278
Name: premature_age, dtype: int64

In [8]:
data_4yo.sib.replace({0:'1', 1:'2', 2:'3', 3:'4+', np.nan:'Missing'}).value_counts().sort_index()

1          183
2          319
3          140
4+          92
Missing     72
Name: sib, dtype: int64

In [9]:
ed_lookup = {0:"8th grade or less",
1:"Some high school",
2:"High school diploma/GED",
3:"Some college",
4:"Bachelor's degree",
5:"Post graduate degree",
6:"Unknown",
np.nan:"Unknown"}

data_4yo.mother_ed.replace(ed_lookup).value_counts()

Unknown                    300
Some high school           200
High school diploma/GED    167
Some college               105
8th grade or less           34
Name: mother_ed, dtype: int64

In [10]:
data_4yo.father_ed.replace(ed_lookup).value_counts()

Unknown                    350
Bachelor's degree          137
Post graduate degree       103
High school diploma/GED     94
Some college                92
Some high school            19
8th grade or less           11
Name: father_ed, dtype: int64

In [11]:
family_lookup = {0:"Ideal Participation",
1:"Good Participation",
2:"Average Participation",
3:"Below Average",
4:"Limited Participation",
np.nan:"Missing"}

data_4yo.family_inv.replace(family_lookup).value_counts()

Good Participation       191
Missing                  189
Ideal Participation      189
Average Participation    162
Below Average             61
Limited Participation     14
Name: family_inv, dtype: int64

In [12]:
data_4yo[["age_diag", "age_amp", "age_int", "age"]].describe()

Unnamed: 0,age_diag,age_amp,age_int,age
count,553.0,545.0,501.0,780.0
mean,17.041591,23.230275,24.125749,38.934615
std,17.348101,17.321429,17.130504,16.038451
min,0.0,0.0,0.0,1.0
25%,1.0,6.0,7.0,30.0
50%,11.0,22.0,24.0,43.0
75%,31.0,37.0,37.0,51.0
max,59.0,65.0,60.0,88.0


In [13]:
data_4yo.otherserv.replace({0: "OPTION + outside", 1: "OPTION only", 2: "Missing"}).value_counts()

OPTION only         320
OPTION + outside    121
Missing             102
Name: otherserv, dtype: int64

## Hearing loss

In [14]:
data_4yo.degree_hl_ad.value_counts()

6.0    268
3.0    106
4.0     96
5.0     91
2.0     56
0.0     35
1.0     12
Name: degree_hl_ad, dtype: int64

In [15]:
data_4yo.degree_hl_as.value_counts()

6.0    270
3.0    115
4.0     94
5.0     81
2.0     53
0.0     31
1.0     19
Name: degree_hl_as, dtype: int64

In [16]:
hl_data = data_4yo[['bilateral_snhl',
                     'bilateral_ansd',
                     'bilateral_mixed',
                     'bilateral_cond',
                     'bilateral_normal',
                     'bilateral_unk',
                     'unilateral_snhl',
                     'unilateral_ansd',
                     'unilateral_mixed',
                     'unilateral_cond',
                     'unilateral_unk',
                     'assymetrical']]

In [17]:
hl_data.sum().astype(int).sort_values(ascending=False)

bilateral_snhl      515
bilateral_cond       26
unilateral_snhl      22
bilateral_mixed      22
bilateral_ansd       19
assymetrical         17
bilateral_normal     17
unilateral_cond      13
unilateral_mixed      1
unilateral_ansd       1
unilateral_unk        0
bilateral_unk         0
dtype: int64

In [18]:
hl_data.mean().round(2).sort_values(ascending=False)

bilateral_snhl      0.64
unilateral_snhl     0.03
bilateral_cond      0.03
bilateral_mixed     0.03
assymetrical        0.02
unilateral_cond     0.02
bilateral_normal    0.02
bilateral_ansd      0.02
unilateral_unk      0.00
unilateral_mixed    0.00
unilateral_ansd     0.00
bilateral_unk       0.00
dtype: float64

In [19]:
tech_data = data_4yo[['bilateral_ha',
                     'bilateral_ci',
                     'bimodal',
                     'bilateral_other',
                     'unilateral_ha',
                     'unilateral_ci',
                     'unilateral_other']]

In [20]:
tech_data.sum().astype(int).sort_values(ascending=False)

bilateral_ci        431
bilateral_ha        225
bimodal              64
unilateral_ci        30
unilateral_ha        23
bilateral_other       3
unilateral_other      2
dtype: int64

In [21]:
tech_data.mean().round(2).sort_values(ascending=False)

bilateral_ci        0.53
bilateral_ha        0.28
bimodal             0.08
unilateral_ci       0.04
unilateral_ha       0.03
unilateral_other    0.00
bilateral_other     0.00
dtype: float64