In [1]:
# Import modules and set options
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

sns.set(context='notebook', style='ticks')

In [2]:
lsl_dr = (pd.read_csv('../data/clean/lsl_dr.csv', index_col=0, low_memory=False)
                  .rename({'onset_1':'identify_mo'}, axis=1))

In [3]:
lsl_dr.shape

(59486, 232)

Exclusions

In [7]:
lsl_dr.drop_duplicates(subset='study_id').autism.sum()

49

In [4]:
inclusion_mask = (~lsl_dr.non_english.astype(bool) 
                  & (lsl_dr.hl==0)
                  & ((lsl_dr.autism==1)))

In [9]:
inclusions = lsl_dr[inclusion_mask].drop_duplicates(subset='study_id')
inclusions.shape

(33, 232)

In [38]:
inclusions.to_csv('../data/clean/autism.csv')

## Demographics

In [10]:
inclusions.male.replace({1: 'Male', 0: 'Female', np.nan: 'Missing'}).value_counts()

Male      24
Female     9
Name: male, dtype: int64

In [11]:
inclusions.race.replace({0:'White', 1:'Black', 2:'Hispanic', 3:'Asian', 4:'Other', np.nan:'Missing'}).value_counts()

White       21
Other        6
Black        3
Asian        2
Hispanic     1
Name: race, dtype: int64

In [12]:
(inclusions.premature_age.replace({9:None})==8).replace({True: '>=36 weeks', False: '<36 weeks', 
                                                            np.nan: 'Unknown'}).value_counts()

>=36 weeks    23
<36 weeks     10
Name: premature_age, dtype: int64

In [13]:
inclusions.sib.replace({0:'1', 1:'2', 2:'3', 3:'4+', np.nan:'Missing'}).value_counts().sort_index()

1           9
2          14
3           5
4+          1
Missing     4
Name: sib, dtype: int64

In [14]:
ed_lookup = {0:"8th grade or less",
1:"Some high school",
2:"High school diploma/GED",
3:"Some college",
4:"Bachelor's degree",
5:"Post graduate degree",
6:"Unknown",
np.nan:"Unknown"}

inclusions.mother_ed.replace(ed_lookup).value_counts()

High school diploma/GED    13
Unknown                    10
Some college                6
Some high school            4
Name: mother_ed, dtype: int64

In [15]:
inclusions.father_ed.replace(ed_lookup).value_counts()

Unknown                    13
Bachelor's degree          11
Post graduate degree        5
Some college                3
High school diploma/GED     1
Name: father_ed, dtype: int64

In [16]:
family_lookup = {0:"Ideal Participation",
1:"Good Participation",
2:"Average Participation",
3:"Below Average",
4:"Limited Participation",
np.nan:"Missing"}

inclusions.family_inv.replace(family_lookup).value_counts()

Average Participation    12
Ideal Participation      11
Missing                   5
Good Participation        3
Below Average             2
Name: family_inv, dtype: int64

In [17]:
inclusions[["age_diag", "age_amp", "age_int", "age"]].describe()

Unnamed: 0,age_diag,age_amp,age_int,age
count,31.0,30.0,25.0,33.0
mean,17.0,20.8,21.08,34.060606
std,17.716283,17.870029,17.83704,19.805207
min,0.0,1.0,1.0,1.0
25%,2.5,5.25,7.0,20.0
50%,18.0,19.5,20.0,33.0
75%,22.0,27.5,26.0,46.0
max,83.0,83.0,83.0,83.0


In [18]:
inclusions.otherserv.replace({0: "OPTION + outside", 1: "OPTION only", 2: "Missing"}).value_counts()

OPTION + outside    22
OPTION only          7
Name: otherserv, dtype: int64

## Hearing loss

In [19]:
inclusions.degree_hl_ad.isnull().sum()

2

In [20]:
inclusions.degree_hl_as.isnull().sum()

2

In [21]:
inclusions.degree_hl_ad.value_counts()

6.0    16
3.0     6
4.0     4
5.0     3
1.0     1
2.0     1
Name: degree_hl_ad, dtype: int64

In [22]:
inclusions.degree_hl_as.value_counts()

6.0    14
3.0     5
2.0     5
5.0     4
0.0     2
4.0     1
Name: degree_hl_as, dtype: int64

In [23]:
hl_data = inclusions.set_index('study_id')[['bilateral_snhl',
                     'bilateral_ansd',
                     'bilateral_mixed',
                     'bilateral_cond',
                     'bilateral_normal',
                     'bilateral_unk',
                     'unilateral_snhl',
                     'unilateral_ansd',
                     'unilateral_mixed',
                     'unilateral_cond',
                     'unilateral_unk',
                     'assymetrical']]

One or both parents with hearing loss

In [28]:
inclusions.one_or_both_parent_hl.value_counts()

0.0    27
1.0     2
Name: one_or_both_parent_hl, dtype: int64

In [25]:
inclusions.one_or_both_parent_hl.isnull().sum()

4

In [29]:
hl_data.sum().astype(int).sort_values(ascending=False)

bilateral_snhl      27
assymetrical         1
unilateral_mixed     1
unilateral_snhl      1
bilateral_mixed      1
unilateral_unk       0
unilateral_cond      0
unilateral_ansd      0
bilateral_unk        0
bilateral_normal     0
bilateral_cond       0
bilateral_ansd       0
dtype: int64

In [30]:
hl_data.mean().round(2).sort_values(ascending=False)

bilateral_snhl      0.82
assymetrical        0.03
unilateral_mixed    0.03
unilateral_snhl     0.03
bilateral_mixed     0.03
unilateral_unk      0.00
unilateral_cond     0.00
unilateral_ansd     0.00
bilateral_unk       0.00
bilateral_normal    0.00
bilateral_cond      0.00
bilateral_ansd      0.00
dtype: float64

In [31]:
tech_data = inclusions.set_index('study_id')[['bilateral_ha',
                     'bilateral_ci',
                     'bimodal',
                     'bilateral_other',
                     'unilateral_ha',
                     'unilateral_ci',
                     'unilateral_other']]

In [32]:
tech_data.sum().astype(int).sort_values(ascending=False)

bilateral_ci        22
bilateral_ha         6
unilateral_ci        4
bimodal              1
unilateral_other     0
unilateral_ha        0
bilateral_other      0
dtype: int64

In [33]:
tech_data.mean().round(2).sort_values(ascending=False)

bilateral_ci        0.67
bilateral_ha        0.18
unilateral_ci       0.12
bimodal             0.03
unilateral_other    0.00
unilateral_ha       0.00
bilateral_other     0.00
dtype: float64

Individuals with no technology type

In [34]:
tech_data[tech_data.sum(1)==0].index.values

array([], dtype=object)

## Summary of scores by domain

In [35]:
inclusions.head()

Unnamed: 0,redcap_event_name,academic_year_rv,hl,male,_race,prim_lang,sib,_mother_ed,father_ed,par1_ed,...,gf3_sis_ss,gf3_siw_ss,gf_version,ppvt_f,ppvt_ss,rowpvt_ss,school,score,test_name,test_type
178,initial_assessment_arm_1,2018.0,0.0,1.0,0.0,0.0,1.0,6.0,6.0,,...,,,,,,,625,70.0,,EVT
3879,initial_assessment_arm_1,2017.0,0.0,1.0,2.0,0.0,2.0,6.0,6.0,1.0,...,,,,,,,415,54.0,,EVT
3936,initial_assessment_arm_1,2012.0,0.0,0.0,1.0,0.0,,5.0,4.0,,...,,,,,,,416,70.0,,Arizonia
5357,initial_assessment_arm_1,2016.0,0.0,1.0,0.0,0.0,1.0,6.0,6.0,,...,,,,,,,624,50.0,PLS,receptive
7498,initial_assessment_arm_1,2015.0,0.0,1.0,3.0,0.0,1.0,5.0,6.0,,...,,,,,,,1149,95.0,,EOWPVT


In [36]:
inclusions.loc[(inclusions.domain=='Language') & (inclusions.test_type=='receptive'), 
           'domain'] = 'Receptive Language'
inclusions.loc[(inclusions.domain=='Language') & (inclusions.test_type=='expressive'), 
           'domain'] = 'Expressive Language'

In [37]:
inclusions.groupby('domain').score.describe().round(1)

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
domain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Articulation,9.0,78.1,17.4,49.0,70.0,78.0,85.0,102.0
Expressive Vocabulary,13.0,77.6,31.1,20.0,57.0,81.0,98.0,124.0
Receptive Language,10.0,61.9,11.5,50.0,51.0,60.0,71.8,79.0
Receptive Vocabulary,1.0,80.0,,80.0,80.0,80.0,80.0,80.0
