# Combining LEQ and CHEC Raw Data

In [3]:
import pandas as pd

df1 = pd.read_csv('../scored_data/LEQ_CHEC_rated_20200914.csv', index_col=None)
df2 = pd.read_csv('../scored_data/LEQ_EmoGrow_rated_20200914.csv', index_col=None)

df3 = pd.concat([df1, df2])

df3.to_csv('../scored_data/combinedLEQ_20200914.csv')

# Scoring

In [65]:
data = pd.read_csv('../scored_data/combinedLEQ_20200914_agesQC.csv', index_col=0)
for c in data.columns:
    print(c)

ID
serious_accident
wit_serious_accident
nat_disaster
other_illnessinjury
death_person
death_pet
med_procedures
separation
suicide
assault
threat_harm
kidnap
animal_attack
wit_fam_violence
wit_fam_threat
wit_other_violence
yell
lack_care
new_sibling
new_school
new_home
change_fam
hobby
new_friend
end_friend
finance
recognition
other_stress
other_pos
ser_acc_descr1
ser_acc_descr2
ser_acc_descr3
ser_acc_descr4
ser_acc_ageY1
ser_acc_ageM1
ser_acc_ageY2
ser_acc_ageM2
ser_acc_ageY3
ser_acc_ageM3
ser_acc_ageY4
ser_acc_ageM4
ser_acc_imp1
ser_acc_imp2
ser_acc_imp3
ser_acc_imp4
ser_acc_type1
ser_acc_type2
ser_acc_type3
ser_acc_type4
ser_acc_prox1
ser_acc_prox2
ser_acc_prox3
ser_acc_prox4
wit_ser_acc_descr1
wit_ser_acc_descr2
wit_ser_acc_descr3
wit_ser_acc_descr4
wit_ser_acc_ageY1
wit_ser_acc_ageM1
wit_ser_acc_ageY2
wit_ser_acc_ageM2
wit_ser_acc_ageY3
wit_ser_acc_ageM3
wit_ser_acc_ageY4
wit_ser_acc_ageM4
wit_ser_acc_imp1
wit_ser_acc_imp2
wit_ser_acc_imp3
wit_ser_acc_imp4
wit_ser_acc_type1
wit_se

In [66]:
prefixes = ['ser_acc','wit_ser_acc','nat_dis','oth_ill','death_oth','death_pet','med_pro','sep',
            'suicide','assault','threat','kidnap','an_att','wit_fam_vio','wit_fam_threat',
            'wit_oth_vio','yell','lack_care','new_sib','new_scho','new_home','change_fam','hobby',
            'new_friend','end_friend','finan','recog','oth_stress','oth_pos']
neg_pref = ['ser_acc','wit_ser_acc','nat_dis','oth_ill','death_oth','death_pet','med_pro','sep',
            'suicide','assault','threat','kidnap','an_att','wit_fam_vio','wit_fam_threat',
            'wit_oth_vio','yell','lack_care','oth_stress']
pos_pref = ['hobby','new_friend','recog','oth_pos']
ambig_pref = ['new_sib','new_scho','new_home','change_fam','end_friend','finan']

In [None]:
import numpy as np
# convert data to long form
imp_long = pd.melt(data, id_vars=['ID'], var_name=['stress_instance'],
                value_vars=[a+'_imp1' for a in prefixes] + [a+'_imp2' for a in prefixes] + [a+'_imp3' for a in prefixes] +[a+'_imp4' for a in prefixes] +[a+'_imp5' for a in prefixes],
                  value_name='impact')

prox_long = pd.melt(data, id_vars=['ID'], var_name=['stress_instance'],
                value_vars=[a+'_prox1' for a in prefixes] + [a+'_prox2' for a in prefixes] + [a+'_prox3' for a in prefixes] +[a+'_prox4' for a in prefixes] + [a+'_prox5' for a in prefixes],
                  value_name='proximity')

ageY_long = pd.melt(data, id_vars=['ID'], var_name=['stress_instance'],
                value_vars=[a+'_ageY1' for a in prefixes] + [a+'_ageY2' for a in prefixes] + [a+'_ageY3' for a in prefixes] +[a+'_ageY4' for a in prefixes]+ [a+'_ageY5' for a in prefixes],
                  value_name='ageY')

ageM_long = pd.melt(data, id_vars=['ID'], var_name=['stress_instance'],
                value_vars=[a+'_ageM1' for a in prefixes] + [a+'_ageM2' for a in prefixes] + [a+'_ageM3' for a in prefixes]+ [a+'_ageM4' for a in prefixes]+[a+'_ageM5' for a in prefixes],
                  value_name='ageM')

# compute age in years and propagate NaNs
age_long = ageY_long[['ID','stress_instance']]
ages = np.vstack((ageY_long['ageY'],(ageM_long['ageM']/12)))
age_long['age_years'] = np.nansum(ages, axis=0)
age_long['age_years'][np.isnan(ageY_long['ageY']) & (np.isnan(ageM_long['ageM']))]=np.nan

# separate stressors names from instances
age_long['instance'] = age_long['stress_instance'].str[-1]
age_long['event'] = age_long['stress_instance'].str[:-6]

# compile data into one long format file
data_long = pd.concat((age_long[['ID','event','instance','age_years']], prox_long[['proximity']],
                       imp_long[['impact']]),axis=1)
# compute if an event was positive, negative, or neutral based on impact scores
data_long['impact_dir'] = np.nan
data_long['impact_dir'][data_long['impact']==0] = 'none'
data_long['impact_dir'][data_long['impact']>0] = 'positive'
data_long['impact_dir'][data_long['impact']<0] = 'negative'

# convert impact scores to absolute values
data_long['impact_mag'] = np.abs(data_long['impact'])

# compute proximity weights
data_long['prox_weight'] = 4 - data_long['proximity']

# compute age bins
data_long['age_bin'] = np.nan
data_long['age_bin'][data_long['age_years']<=3] = 'infanttod03'
data_long['age_bin'][(data_long['age_years']>3) & (data_long['age_years']<=5)] = 'prek35'
data_long['age_bin'][(data_long['age_years']>5)] = 'school68'

# save long-form data
data_long.to_csv('../scored_data/LEQ_combined_long_20201104.csv')

In [107]:
#went through and filled in any missing data (e.g., proximity scores or ages)
data_long=read_csv('../scored_data/LEQ_combined_long_20201104_cleaned.csv',index_col=0)
data_long.columns

Index(['ID', 'event', 'instance', 'age_years', 'proximity', 'impact',
       'impact_dir', 'impact_mag', 'prox_weight', 'age_bin'],
      dtype='object')

In [116]:
# Pull subject-level summary data
subject_list = np.unique(data_long['ID'])
subject_data = DataFrame(index = subject_list,
                         columns = ['LEQ_tot_neg','LEQ_sumimp_neg','LEQ_tot_pos','LEQ_sumimp_pos',
                                    'LEQ_tot_noimp','LEQ_wprox_neg','LEQ_wprox_pos',
                                    'LEQ_tot_neg_prox1','LEQ_tot_neg_prox2','LEQ_tot_neg_prox3',
                                    'LEQ_tot_neg_infanttod03','LEQ_tot_neg_prek35','LEQ_tot_neg_school68',
                                    'LEQ_proxwtot_neg_infanttod03','LEQ_proxwtot_neg_prek35',
                                    'LEQ_proxwtot_neg_school68'])

for sub in subject_list:
    t = data_long[data_long['ID']==sub]
    tneg = t[t['impact_dir']=='negative']
    tpos = t[t['impact_dir']=='positive']
    tnoimp = t[t['impact_dir']=='none']
    tot_neg = sum(np.isfinite(tneg['impact']))
    tot_pos = sum(np.isfinite(tpos['impact']))
    tot_noimp = sum(np.isfinite(tnoimp['impact']))
    sumimp_neg = sum(tneg['impact_mag'])
    sumimp_pos = sum(tpos['impact_mag'])
    wprox_neg = np.nansum(tneg['prox_weight'])
    wprox_pos = np.nansum(tpos['prox_weight'])
    tot_neg_prox1 = sum(tneg['proximity']==1)
    tot_neg_prox2 = sum(tneg['proximity']==2)
    tot_neg_prox3 = sum(tneg['proximity']==3)
    tot_neg_infanttod03 = sum(tneg['proximity'][tneg['age_bin']=='infanttod03'])
    tot_neg_prek35 = sum(tneg['proximity'][tneg['age_bin']=='prek35'])
    tot_neg_school68 = sum(tneg['proximity'][tneg['age_bin']=='school68'])
    proxwtot_neg_infanttod03 = np.nansum(tneg['prox_weight'][tneg['age_bin']=='infanttod03'])
    proxwtot_neg_prek35 = np.nansum(tneg['prox_weight'][tneg['age_bin']=='prek35'])
    proxwtot_neg_school68 = np.nansum(tneg['prox_weight'][tneg['age_bin']=='school68'])

    subject_data.loc[sub] = [tot_neg, sumimp_neg, tot_pos, sumimp_pos, tot_noimp, wprox_neg, wprox_pos, 
                             tot_neg_prox1, tot_neg_prox2, tot_neg_prox3, tot_neg_infanttod03, 
                             tot_neg_prek35, tot_neg_school68, proxwtot_neg_infanttod03, proxwtot_neg_prek35, 
                             proxwtot_neg_school68]
    subject_data = subject_data.fillna(0)

subject_data.to_csv('../scored_data/full_sample_LEQ_summaryscores_20201116.csv')

In [119]:
# Pull subject/stressor-level summary data
sub_event_data = DataFrame(index=subject_list)
for sub in subject_list:
    for event in prefixes:
        temp = data_long[(data_long['ID']==sub) & (data_long['event']==event)]
        tneg = temp[temp['impact_dir']=='negative']
        tpos = temp[temp['impact_dir']=='positive']
        tnoimp = temp[temp['impact_dir']=='none']
        sub_event_data.loc[sub,event+'_tot_neg'] = sum(np.isfinite(tneg['impact']))
        sub_event_data.loc[sub,event+'_tot_pos'] = sum(np.isfinite(tpos['impact']))
        sub_event_data.loc[sub,event+'_tot_noimp'] = sum(np.isfinite(tnoimp['impact']))
        sub_event_data.loc[sub,event+'_sumimp_neg'] = sum(tneg['impact_mag'])
        sub_event_data.loc[sub,event+'_sumimp_pos'] = sum(tneg['impact_mag'])

sub_event_data.index.name = 'ID'
sub_event_data.to_csv('../scored_data/full_sample_LEQ_eventsummaries_20201117.csv')