In [5]:
import pandas as pd
import numpy as np
from collections import Counter
from re import M
import os
import matplotlib.pyplot as plt

data_output_dir = "data/output/"
if not os.path.exists(data_output_dir):
    os.mkdir(data_output_dir)

data_intermediate_dir = "data/intermediate/"
if not os.path.exists(data_intermediate_dir):
    os.mkdir(data_intermediate_dir)
    
pd.set_option("display.max_columns", None)

# LORIS saved query (all data)
full = pd.read_csv("data/LORIS-release-10.csv", dtype=object)
len(full.index)

# Replace NaN (currently ".") values with np.nan
full = full.replace(".", np.nan)

# Drop first row (doesn't have ID)
full = full.iloc[1: , :]

# Drop empty columns
print(len(full.columns))
cols_before_remove_empty = full.columns
full = full.dropna(how='all', axis=1)
cols_after_remove_empty = full.columns
print(len(full.columns))
print("Empty columns: ")
display(sorted(list(set(cols_before_remove_empty) - set(cols_after_remove_empty))))

# Remove irrelevant NIH toolbox columns

NIH_cols = [x for x in full.columns if "NIH" in x]
NIH_assessments = set([x.split(",")[0] for x in NIH_cols])
print(NIH_assessments)
NIH_scores_cols = [x for x in NIH_cols if x.startswith("NIH_Scores,")]
display(full[NIH_scores_cols].columns)

# Drop percentile scores, only keep actual score
NIH_cols_to_drop = [x for x in NIH_scores_cols if x.endswith("_P")]
full = full.drop(NIH_cols_to_drop, axis = 1)

# Drop non-numeric columns
full = full.drop(["NIH_Scores,NIH7_Incomplete_Reason"], axis = 1)

full[[x for x in full.columns if x.startswith("NIH_Scores,")]].columns

# Remove uninteresting columns
columns_to_drop = []

column_suffixes_to_drop = ["Administration", "Data_entry", "Days_Baseline", "START_DATE", "Season", "Site", "Study", "Year", "Commercial_Use", "Release_Number"]
for suffix in column_suffixes_to_drop:
    cols_w_suffix = [x for x in full.columns if suffix in x]
    columns_to_drop.extend(cols_w_suffix)

present_columns_to_drop = full.filter(columns_to_drop)
print(len(full.columns))
full = full.drop(present_columns_to_drop, axis = 1)
print(len(full.columns))

# Check which columns have weird column names with quotes, like: "BIA,"Year",BIA"
cols_w_broken_quotes = [x for x in full.columns if ('"') in x] 
cols_w_broken_quotes

# Remove questionnaires with broken quotes
full = full.drop(cols_w_broken_quotes, axis=1)

EID_cols = [x for x in full.columns if ",EID" in x]

# Check if EID is equal among the questionnaires in one row
full_for_EID_check = full[EID_cols]

## Fill missing EIDs with EIDs from other questionnaires 
full_for_EID_check = full_for_EID_check.ffill(axis=1).bfill(axis=1)

## Check if the EID of all columns are equal to EID of the first column
print(full_for_EID_check.eq(full_for_EID_check.iloc[:, 0], axis=0).all(1).value_counts())

# Remove 16 lines with different EID within one row
print(len(full))
full = full[full_for_EID_check.eq(full_for_EID_check.iloc[:, 0], axis=0).all(1)]
print(len(full))

# ==> EID of each questionnaire are the same in a row, can use the first non-empty one as an ID

# Fill ID field with the first non-null questionnaire-specific EID
full["ID"] = full_for_EID_check.iloc[:, 0]
print(len(full.index))

# Check which questionnaires are present in the rows with unserscores (NDARZZ007YMP_1, NDARAA075AMK_Visit_1)
rows_with_underscore_in_id = full[full["ID"].str.contains("_")]
non_empty_columns_in_underscore = rows_with_underscore_in_id.columns[
    ~rows_with_underscore_in_id.isna().all()].tolist() 
non_empty_questionnaires_in_underscore = set([x.split(",")[0] for x in non_empty_columns_in_underscore])
print("Non empty questionnaires in rows with underscores in EID: ", non_empty_questionnaires_in_underscore)

# Drop rows with underscores
non_empty_questionnaires_in_underscore.remove("Identifiers")
non_empty_questionnaires_in_underscore.remove("ID")
full_wo_underscore = full[~full["ID"].str.contains("_")]

# Drop questionnaires present in rows with underscores from data ({'DailyMeds', 'TRF', 'TRF_P', 'TRF_Pre'})
for questionnaire in non_empty_questionnaires_in_underscore:
    full_wo_underscore = full_wo_underscore.drop(full_wo_underscore.filter(regex=(questionnaire+",")), axis=1)
    
# Drop questionnaires present in rows with underscores from data from list of ID columns (will be used to check if an assessment is filled)
EID_cols = [x for x in EID_cols if 'TRF' not in x]
EID_cols = [x for x in EID_cols if 'DailyMeds' not in x]

print(len(full_wo_underscore.index))
print(len(full_wo_underscore["ID"].unique()))

# Remove incomplete DX and missing DX

full_wo_underscore = full_wo_underscore[full_wo_underscore["Diagnosis_ClinicianConsensus,DX_01"] != "No Diagnosis Given: Incomplete Eval"]
full_wo_underscore = full_wo_underscore[full_wo_underscore["Diagnosis_ClinicianConsensus,EID"].notna()]
full_wo_underscore["ID"].nunique()

# Get list of assessments in data
assessment_list = set([x.split(",")[0] for x in EID_cols])

# Get assessments most people filled
pd.set_option('display.max_rows', None)
assessment_answer_counts = full_wo_underscore[EID_cols].count().sort_values(ascending=False).to_frame()
assessment_answer_counts["Ratio"] = assessment_answer_counts[0]/full_wo_underscore["ID"].nunique()*100
assessment_answer_counts.columns = ["N of Participants", "% of Participants Filled"]
display(assessment_answer_counts)

assessment_answer_counts.to_csv("data/output/assessment_filled_distrib.csv")

# Only include relevant cognitive testing, Questionnaire Measures of Emotional and Cognitive Status, and Questionnaire Measures of Family Structure, Stress, and Trauma
relevant_assessments = ["Basic_Demos", "WIAT", "WISC", "NIH_Scores", "Barratt", "WHODAS_P", "CIS_P", "WHODAS_SR", "CIS_SR", "C3SR"]
relevant_EID_list = [x+",EID" for x in relevant_assessments]

# Get only people who took the relevant assessments
relevant_columns = []
for assessment in relevant_assessments:
    columns = [column for column in full_wo_underscore.columns if column.startswith(assessment)]
    relevant_columns.extend(columns)
    
diag_colunms = ["Diagnosis_ClinicianConsensus,DX_01", "Diagnosis_ClinicianConsensus,DX_02", "Diagnosis_ClinicianConsensus,DX_03", "Diagnosis_ClinicianConsensus,DX_04", "Diagnosis_ClinicianConsensus,DX_05", "Diagnosis_ClinicianConsensus,DX_06", "Diagnosis_ClinicianConsensus,DX_07", "Diagnosis_ClinicianConsensus,DX_08", "Diagnosis_ClinicianConsensus,DX_09", "Diagnosis_ClinicianConsensus,DX_10"]
relevant_data = full_wo_underscore.loc[full_wo_underscore[relevant_EID_list].dropna(how="any").index][relevant_columns+["ID"]+diag_colunms]

# Remove EID columns: not needed anymore
relevant_data = relevant_data.drop(relevant_EID_list, axis=1)

# Remove non-used output columns
print([x for x in relevant_data.columns if "WIAT" in x])
WIAT_cols_to_keep = ["WIAT,WIAT_Word_Stnd", "WIAT,WIAT_Num_Stnd"]
WIAT_cols_to_drop = [x for x in relevant_data.columns if "WIAT" in x and x not in WIAT_cols_to_keep] 
relevant_data = relevant_data.drop(WIAT_cols_to_drop, axis=1)
print([x for x in relevant_data.columns if "WIAT" in x])

print([x for x in relevant_data.columns if "WISC" in x])
WISC_cols_to_keep = ["WISC,WISC_Coding_Scaled", "WISC,WISC_SS_Scaled", "WISC,WISC_FSIQ"]
WISC_cols_to_drop = [x for x in relevant_data.columns if "WISC" in x and x not in WISC_cols_to_keep] 
relevant_data = relevant_data.drop(WISC_cols_to_drop, axis=1)
print([x for x in relevant_data.columns if "WISC" in x])

display(relevant_data.iloc[:5,:])

# Aggregare demographics input columns
print(relevant_assessments)

print([x for x in relevant_data.columns if x.startswith("Barratt")])

## Remove per parent data from Barratt
relevant_data = relevant_data.drop(["Barratt,Barratt_P1_Edu", "Barratt,Barratt_P1_Occ", "Barratt,Barratt_P2_Edu", "Barratt,Barratt_P2_Occ"], axis=1)

# Convert numeric columns to numeric 
def print_and_to_numeric(col):
    if col.name != "ID" and "Diagnosis_ClinicianConsensus" not in col.name:
        print(col.name)
        return pd.to_numeric(col)
    else:
        return col
relevant_data = relevant_data.apply(lambda col: print_and_to_numeric(col))

# Investigate missing values 
missing_report = relevant_data.isna().sum().to_frame(name="Amount missing")
missing_report["Persentage missing"] = missing_report["Amount missing"]/relevant_data["ID"].nunique() * 100
missing_report = missing_report[~missing_report.index.str.contains("Diagnosis_ClinicianConsensus")] # remove dx because it's expected
missing_report = missing_report[missing_report["Persentage missing"] > 0]
missing_report[missing_report["Persentage missing"] > 0].sort_values(ascending=False, by="Amount missing").style.format(precision=2)

# Remove columns with more than 40% missing data
cols_to_remove = list(missing_report[missing_report["Persentage missing"] > 40].index)
cols_to_remove
print(len(relevant_data.columns))
relevant_data = relevant_data.drop(cols_to_remove, axis=1)
print(len(relevant_data.columns))

# Add missingness marker for columns with more than 5% missing data 

missing_cols_to_mark = list(missing_report[(missing_report["Persentage missing"] <= 40) & (missing_report["Persentage missing"] > 5)].index)

print(len(relevant_data.columns))
for col in missing_cols_to_mark:
    relevant_data[col+ "_WAS_MISSING"] = relevant_data[col].isna()
print(len(relevant_data.columns))

# Remove rows where output vars are not present: WIAT,WIAT_Num_Stnd, WISC,WISC_Coding_Scaled, WISC,WISC_SS_Scaled, WISC,WISC_FSIQ, WIAT,WIAT_Word_Stnd 
print(len(relevant_data.index))
relevant_data = relevant_data.dropna(subset = ["WIAT,WIAT_Num_Stnd", "WISC,WISC_Coding_Scaled", "WISC,WISC_SS_Scaled", "WISC,WISC_FSIQ", "WIAT,WIAT_Word_Stnd"])
print(len(relevant_data.index))

# Check if some columns have no variance
print(list(set(relevant_data.columns) - set(relevant_data.loc[:, (relevant_data != relevant_data.iloc[0]).any()].columns)))

print(len(relevant_data.columns))
relevant_data = relevant_data.loc[:, (relevant_data != relevant_data.iloc[0]).any()]
print(len(relevant_data.columns))

# Transform diagnosis columns
og_diag_cols = [x for x in relevant_data.columns if "DX_" in x]

## Get list of diagnoses
diags = []
for col in og_diag_cols:
    diags.extend(list(relevant_data[col].value_counts().index))
diags = list(set(diags))
diags.remove(' ')

## Make new columns
for diag in diags:
    relevant_data["Diag: "+diag] = (relevant_data[og_diag_cols] == diag).any(axis=1)
    
## Drop original diag columns
relevant_data = relevant_data.drop(og_diag_cols, axis=1)

# Remove ID column - not needed anymore
relevant_data = relevant_data.drop("ID", axis=1)

# Convert new boolean columns to numeric
relevant_data = relevant_data.replace({True: 1, False: 0})

7310
6172
Empty columns: 


['ACE,Comment_ID',
 'ACE,PSCID',
 'ACE,Visit_label',
 'ACE,\ufeffEID',
 'APPLEQUAN""',
 'APPLSCQUAN""',
 'APQ_P,Comment_ID',
 'APQ_P,PSCID',
 'APQ_P,Visit_label',
 'APQ_P,\ufeffEID',
 'APQ_SR,Comment_ID',
 'APQ_SR,PSCID',
 'APQ_SR,Visit_label',
 'APQ_SR,\ufeffEID',
 'ARI_P,Comment_ID',
 'ARI_P,PSCID',
 'ARI_P,Visit_label',
 'ARI_P,\ufeffEID',
 'ARI_S,Comment_ID',
 'ARI_S,PSCID',
 'ARI_S,Visit_label',
 'ARI_S,\ufeffEID',
 'ASR,Comment_ID',
 'ASR,PSCID',
 'ASR,Visit_label',
 'ASR,\ufeffEID',
 'ASSQ,Comment_ID',
 'ASSQ,PSCID',
 'ASSQ,Visit_label',
 'ASSQ,\ufeffEID',
 'Acc""',
 'Act_Sev_10""',
 'Age""',
 'Area_Head""',
 'Attentive""',
 'Audit,Comment_ID',
 'Audit,PSCID',
 'Audit,Visit_label',
 'Audit,\ufeffEID',
 'BIA,"Activity_Level",BIA',
 'BIA,"BMC",BIA',
 'BIA,"BMR",BIA',
 'BIA,"Days_Baseline",BIA',
 'BIA,"EID",BIA',
 'BIA,"FFMI",BIA',
 'BIA,"Fat",BIA',
 'BIA,"Height",BIA',
 'BIA,"LDM",BIA',
 'BIA,"SMM",BIA',
 'BIA,"Season",BIA',
 'BIA,"Study",BIA',
 'BIA,"TBW",BIA',
 'BIA,"Year",BIA',

{'NIH_Scores'}


Index(['NIH_Scores,Administration', 'NIH_Scores,Data_entry',
       'NIH_Scores,Days_Baseline', 'NIH_Scores,EID', 'NIH_Scores,NIH7_Card',
       'NIH_Scores,NIH7_Card_P', 'NIH_Scores,NIH7_Comp',
       'NIH_Scores,NIH7_Comp_P', 'NIH_Scores,NIH7_Complete',
       'NIH_Scores,NIH7_Flanker', 'NIH_Scores,NIH7_Flanker_P',
       'NIH_Scores,NIH7_Incomplete_Reason', 'NIH_Scores,NIH7_List',
       'NIH_Scores,NIH7_List_P', 'NIH_Scores,NIH7_Pattern',
       'NIH_Scores,NIH7_Pattern_P', 'NIH_Scores,NIH7_Picture',
       'NIH_Scores,NIH7_Picture_P', 'NIH_Scores,START_DATE',
       'NIH_Scores,Season', 'NIH_Scores,Site', 'NIH_Scores,Study',
       'NIH_Scores,Year'],
      dtype='object')

6165
5326
True     7227
False      16
dtype: int64
7243
7227
7227
Non empty questionnaires in rows with underscores in EID:  {'TRF_P', 'DailyMeds', 'TRF_Pre', 'ID', 'TRF', 'Identifiers'}
4146
4146


Unnamed: 0,N of Participants,% of Participants Filled
"Basic_Demos,EID",3613,100.0
"Diagnosis_ClinicianConsensus,EID",3613,100.0
"PreInt_EduHx,EID",3603,99.723222
"PreInt_DevHx,EID",3601,99.667866
"NIH_Scores,EID",3600,99.640188
"PreInt_TxHx,EID",3593,99.446443
"WIAT,EID",3592,99.418766
"Pegboard,EID",3568,98.754498
"SympChck,EID",3562,98.588431
"SCQ,EID",3561,98.560753


['WIAT,WIAT_Complete', 'WIAT,WIAT_Incomplete_reason', 'WIAT,WIAT_Invalid_Reason', 'WIAT,WIAT_LCODC_P', 'WIAT,WIAT_LCODC_Raw', 'WIAT,WIAT_LCODC_Stnd', 'WIAT,WIAT_LCRV_P', 'WIAT,WIAT_LCRV_Raw', 'WIAT,WIAT_LCRV_Std', 'WIAT,WIAT_LC_P', 'WIAT,WIAT_LC_Stnd', 'WIAT,WIAT_MP_P', 'WIAT,WIAT_MP_Raw', 'WIAT,WIAT_MP_Stnd', 'WIAT,WIAT_Num_P', 'WIAT,WIAT_Num_Raw', 'WIAT,WIAT_Num_Stnd', 'WIAT,WIAT_Pseudo_P', 'WIAT,WIAT_Pseudo_Raw', 'WIAT,WIAT_Pseudo_Stnd', 'WIAT,WIAT_RC_P', 'WIAT,WIAT_RC_Raw', 'WIAT,WIAT_RC_Stnd', 'WIAT,WIAT_Spell_P', 'WIAT,WIAT_Spell_Raw', 'WIAT,WIAT_Spell_Stnd', 'WIAT,WIAT_Valid', 'WIAT,WIAT_Word_P', 'WIAT,WIAT_Word_Raw', 'WIAT,WIAT_Word_Stnd']
['WIAT,WIAT_Num_Stnd', 'WIAT,WIAT_Word_Stnd']
['WISC,WISC_BD_Raw', 'WISC,WISC_BD_Scaled', 'WISC,WISC_Coding_Raw', 'WISC,WISC_Coding_Scaled', 'WISC,WISC_DS_Raw', 'WISC,WISC_DS_Scaled', 'WISC,WISC_FRI', 'WISC,WISC_FRI_Percentile', 'WISC,WISC_FRI_Sum', 'WISC,WISC_FSIQ', 'WISC,WISC_FSIQ_Percentile', 'WISC,WISC_FSIQ_Sum', 'WISC,WISC_FW_Raw', 'WISC

Unnamed: 0,"Basic_Demos,Age","Basic_Demos,Sex","WIAT,WIAT_Num_Stnd","WIAT,WIAT_Word_Stnd","WISC,WISC_Coding_Scaled","WISC,WISC_FSIQ","WISC,WISC_SS_Scaled","NIH_Scores,NIH7_Card","NIH_Scores,NIH7_Comp","NIH_Scores,NIH7_Complete","NIH_Scores,NIH7_Flanker","NIH_Scores,NIH7_List","NIH_Scores,NIH7_Pattern","NIH_Scores,NIH7_Picture","Barratt,Barratt_P1_Edu","Barratt,Barratt_P1_Occ","Barratt,Barratt_P2_Edu","Barratt,Barratt_P2_Occ","Barratt,Barratt_Total","Barratt,Barratt_Total_Edu","Barratt,Barratt_Total_Occ","Barratt,financialsupport","WHODAS_P,WHODAS_P_01","WHODAS_P,WHODAS_P_02","WHODAS_P,WHODAS_P_03","WHODAS_P,WHODAS_P_04","WHODAS_P,WHODAS_P_05","WHODAS_P,WHODAS_P_06","WHODAS_P,WHODAS_P_07","WHODAS_P,WHODAS_P_08","WHODAS_P,WHODAS_P_09","WHODAS_P,WHODAS_P_10","WHODAS_P,WHODAS_P_11","WHODAS_P,WHODAS_P_12","WHODAS_P,WHODAS_P_Days01","WHODAS_P,WHODAS_P_Days02","WHODAS_P,WHODAS_P_Days03","WHODAS_P,WHODAS_P_Total","CIS_P,CIS_P_01","CIS_P,CIS_P_02","CIS_P,CIS_P_03","CIS_P,CIS_P_04","CIS_P,CIS_P_05","CIS_P,CIS_P_06","CIS_P,CIS_P_07","CIS_P,CIS_P_08","CIS_P,CIS_P_09","CIS_P,CIS_P_10","CIS_P,CIS_P_11","CIS_P,CIS_P_12","CIS_P,CIS_P_13","CIS_P,CIS_P_Score","WHODAS_SR,WHODAS_SR_01","WHODAS_SR,WHODAS_SR_02","WHODAS_SR,WHODAS_SR_03","WHODAS_SR,WHODAS_SR_04","WHODAS_SR,WHODAS_SR_05","WHODAS_SR,WHODAS_SR_06","WHODAS_SR,WHODAS_SR_07","WHODAS_SR,WHODAS_SR_08","WHODAS_SR,WHODAS_SR_09","WHODAS_SR,WHODAS_SR_10","WHODAS_SR,WHODAS_SR_11","WHODAS_SR,WHODAS_SR_12","WHODAS_SR,WHODAS_SR_Days01","WHODAS_SR,WHODAS_SR_Days02","WHODAS_SR,WHODAS_SR_Days03","WHODAS_SR,WHODAS_SR_Score","CIS_SR,CIS_SR_01","CIS_SR,CIS_SR_02","CIS_SR,CIS_SR_03","CIS_SR,CIS_SR_04","CIS_SR,CIS_SR_05","CIS_SR,CIS_SR_06","CIS_SR,CIS_SR_07","CIS_SR,CIS_SR_08","CIS_SR,CIS_SR_09","CIS_SR,CIS_SR_10","CIS_SR,CIS_SR_11","CIS_SR,CIS_SR_12","CIS_SR,CIS_SR_13","CIS_SR,CIS_SR_Total","C3SR,C3SR_01","C3SR,C3SR_02","C3SR,C3SR_03","C3SR,C3SR_04","C3SR,C3SR_05","C3SR,C3SR_06","C3SR,C3SR_07","C3SR,C3SR_08","C3SR,C3SR_09","C3SR,C3SR_10","C3SR,C3SR_11","C3SR,C3SR_12","C3SR,C3SR_13","C3SR,C3SR_14","C3SR,C3SR_15","C3SR,C3SR_16","C3SR,C3SR_17","C3SR,C3SR_18","C3SR,C3SR_19","C3SR,C3SR_20","C3SR,C3SR_21","C3SR,C3SR_22","C3SR,C3SR_23","C3SR,C3SR_24","C3SR,C3SR_25","C3SR,C3SR_26","C3SR,C3SR_27","C3SR,C3SR_28","C3SR,C3SR_29","C3SR,C3SR_30","C3SR,C3SR_31","C3SR,C3SR_32","C3SR,C3SR_33","C3SR,C3SR_34","C3SR,C3SR_35","C3SR,C3SR_36","C3SR,C3SR_37","C3SR,C3SR_38","C3SR,C3SR_39","C3SR,C3SR_AG","C3SR,C3SR_AG_T","C3SR,C3SR_FR","C3SR,C3SR_FR_T","C3SR,C3SR_HY","C3SR,C3SR_HY_T","C3SR,C3SR_IN","C3SR,C3SR_IN_T","C3SR,C3SR_LP","C3SR,C3SR_LP_T","C3SR,C3SR_NI","C3SR,C3SR_PI",ID,"Diagnosis_ClinicianConsensus,DX_01","Diagnosis_ClinicianConsensus,DX_02","Diagnosis_ClinicianConsensus,DX_03","Diagnosis_ClinicianConsensus,DX_04","Diagnosis_ClinicianConsensus,DX_05","Diagnosis_ClinicianConsensus,DX_06","Diagnosis_ClinicianConsensus,DX_07","Diagnosis_ClinicianConsensus,DX_08","Diagnosis_ClinicianConsensus,DX_09","Diagnosis_ClinicianConsensus,DX_10"
4,9.165297,1,107,83,13,100,11,100,,1,78,82.0,65,,21,40,,,61.0,21.0,40.0,1,0,1,1,0,0,1,0,0,0,0,1,1,5,0.0,0.0,10.42,2,1,1,1,2,1,2,2,5,0,0,3,2,17,0,1,3,,1,3,0,0,0,0,1,0,5,7,0,18.75,2,0,0,2,1,0,0,1,5,0,0,1,2,9,1,1,2,3,1,3,3,0,1,0,3,1,1,1,1,2,0,1,0,0,1,0,3,3,3,0,0,3,3,2,2,0,2,0,0,3,0,2,0,2,56,0,41,10,75,7,58,9,67,1,2,NDARAA504CRN,ADHD-Inattentive Type,Specific Learning Disorder with Impairment in ...,Other Specified Anxiety Disorder,Enuresis,Encopresis,,,,,
6,13.62788,0,53,100,6,87,9,61,,1,67,74.0,71,,18,35,,,53.0,18.0,35.0,1,1,3,0,1,0,2,0,0,0,2,3,2,10,,,29.17,2,3,5,3,2,2,3,2,5,2,2,2,3,26,0,1,1,1.0,0,1,1,1,1,1,1,1,0,0,0,20.83,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,1,2,2,2,3,0,2,0,3,1,1,1,1,1,0,1,0,1,0,0,3,0,1,0,1,0,0,1,1,0,1,0,0,0,0,1,0,1,48,0,42,5,58,7,63,5,63,2,0,NDARAA947ZG5,ADHD-Combined Type,Autism Spectrum Disorder,Specific Learning Disorder with Impairment in ...,Other Specified Trauma- and Stressor-Related D...,,,,,,
7,7.98266,1,96,96,10,95,9,87,,1,83,86.0,77,,21,40,15.0,30.0,53.0,18.0,35.0,3,0,1,0,0,0,0,0,0,0,0,0,1,10,0.0,0.0,4.17,0,0,2,0,1,0,0,0,1,1,0,1,0,6,4,1,2,1.0,2,4,4,0,1,4,0,1,5,1,1,50.0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,2,2,2,3,0,2,0,3,2,2,3,1,2,0,3,0,1,3,1,3,3,3,0,2,0,2,1,3,0,3,0,1,2,0,3,0,1,48,1,45,8,68,14,77,12,83,0,1,NDARAA948VFH,ADHD-Combined Type,Enuresis,,,,,,,,
8,10.793862,1,98,98,9,89,11,86,,1,83,100.0,95,,21,35,12.0,15.0,41.5,16.5,25.0,1,0,1,0,0,1,1,0,0,0,0,0,2,15,2.0,5.0,10.42,3,0,0,1,3,0,1,0,0,0,0,4,2,14,4,2,1,0.0,3,1,0,1,1,0,0,1,3,0,30,29.17,0,0,0,1,2,0,0,1,0,0,0,1,1,6,1,2,1,1,2,3,3,0,2,0,3,2,1,2,1,2,0,0,1,3,2,0,3,2,3,0,2,2,1,1,3,0,2,0,1,1,1,2,0,1,48,2,50,7,65,9,69,8,71,0,1,NDARAB055BPR,ADHD-Combined Type,,,,,,,,,
10,12.836185,0,94,110,6,102,4,68,,1,74,,78,,15,25,15.0,30.0,42.5,15.0,27.5,3,0,3,0,0,1,1,0,3,2,0,0,2,10,0.0,0.0,25.0,0,0,0,0,0,0,0,0,0,0,1,3,1,5,2,3,0,0.0,0,2,0,0,0,1,0,0,5,0,3,16.67,1,1,1,0,1,0,1,0,1,0,1,2,1,10,2,2,2,2,2,3,2,2,2,2,2,2,3,3,1,3,0,2,1,1,3,1,3,0,2,0,3,0,2,3,3,0,0,1,0,0,0,1,0,7,87,3,51,10,78,15,86,6,65,0,0,NDARAB458VK9,ADHD-Inattentive Type,Enuresis,,,,,,,,


['Basic_Demos', 'WIAT', 'WISC', 'NIH_Scores', 'Barratt', 'WHODAS_P', 'CIS_P', 'WHODAS_SR', 'CIS_SR', 'C3SR']
['Barratt,Barratt_P1_Edu', 'Barratt,Barratt_P1_Occ', 'Barratt,Barratt_P2_Edu', 'Barratt,Barratt_P2_Occ', 'Barratt,Barratt_Total', 'Barratt,Barratt_Total_Edu', 'Barratt,Barratt_Total_Occ', 'Barratt,financialsupport']
Basic_Demos,Age
Basic_Demos,Sex
WIAT,WIAT_Num_Stnd
WIAT,WIAT_Word_Stnd
WISC,WISC_Coding_Scaled
WISC,WISC_FSIQ
WISC,WISC_SS_Scaled
NIH_Scores,NIH7_Card
NIH_Scores,NIH7_Comp
NIH_Scores,NIH7_Complete
NIH_Scores,NIH7_Flanker
NIH_Scores,NIH7_List
NIH_Scores,NIH7_Pattern
NIH_Scores,NIH7_Picture
Barratt,Barratt_Total
Barratt,Barratt_Total_Edu
Barratt,Barratt_Total_Occ
Barratt,financialsupport
WHODAS_P,WHODAS_P_01
WHODAS_P,WHODAS_P_02
WHODAS_P,WHODAS_P_03
WHODAS_P,WHODAS_P_04
WHODAS_P,WHODAS_P_05
WHODAS_P,WHODAS_P_06
WHODAS_P,WHODAS_P_07
WHODAS_P,WHODAS_P_08
WHODAS_P,WHODAS_P_09
WHODAS_P,WHODAS_P_10
WHODAS_P,WHODAS_P_11
WHODAS_P,WHODAS_P_12
WHODAS_P,WHODAS_P_Days01
WHODAS_P,

In [6]:
# Separate subscale and total scores
for assessment in relevant_assessments:
    display(relevant_data[[x for x in relevant_data.columns if x.startswith(assessment)]].describe().loc[["min", "max"]])

Unnamed: 0,"Basic_Demos,Age","Basic_Demos,Sex"
min,7.237166,0.0
max,16.962126,1.0


Unnamed: 0,"WIAT,WIAT_Num_Stnd","WIAT,WIAT_Word_Stnd"
min,46.0,40.0
max,160.0,142.0


Unnamed: 0,"WISC,WISC_Coding_Scaled","WISC,WISC_FSIQ","WISC,WISC_SS_Scaled"
min,1.0,47.0,1.0
max,19.0,147.0,19.0


Unnamed: 0,"NIH_Scores,NIH7_Card","NIH_Scores,NIH7_Complete","NIH_Scores,NIH7_Flanker","NIH_Scores,NIH7_List","NIH_Scores,NIH7_Pattern"
min,57.0,0.0,58.0,48.0,1.0
max,172.0,2.0,151.0,181.0,169.0


Unnamed: 0,"Barratt,Barratt_Total","Barratt,Barratt_Total_Edu","Barratt,Barratt_Total_Occ","Barratt,financialsupport"
min,0.0,0.0,0.0,1.0
max,66.0,21.0,45.0,3.0


Unnamed: 0,"WHODAS_P,WHODAS_P_01","WHODAS_P,WHODAS_P_02","WHODAS_P,WHODAS_P_03","WHODAS_P,WHODAS_P_04","WHODAS_P,WHODAS_P_05","WHODAS_P,WHODAS_P_06","WHODAS_P,WHODAS_P_07","WHODAS_P,WHODAS_P_08","WHODAS_P,WHODAS_P_09","WHODAS_P,WHODAS_P_10","WHODAS_P,WHODAS_P_11","WHODAS_P,WHODAS_P_12","WHODAS_P,WHODAS_P_Days01","WHODAS_P,WHODAS_P_Days02","WHODAS_P,WHODAS_P_Days03","WHODAS_P,WHODAS_P_Total"
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,30.0,30.0,30.0,79.17


Unnamed: 0,"CIS_P,CIS_P_01","CIS_P,CIS_P_02","CIS_P,CIS_P_03","CIS_P,CIS_P_04","CIS_P,CIS_P_05","CIS_P,CIS_P_06","CIS_P,CIS_P_07","CIS_P,CIS_P_08","CIS_P,CIS_P_09","CIS_P,CIS_P_10","CIS_P,CIS_P_11","CIS_P,CIS_P_12","CIS_P,CIS_P_13","CIS_P,CIS_P_Score"
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,49.0


Unnamed: 0,"WHODAS_SR,WHODAS_SR_01","WHODAS_SR,WHODAS_SR_02","WHODAS_SR,WHODAS_SR_03","WHODAS_SR,WHODAS_SR_04","WHODAS_SR,WHODAS_SR_05","WHODAS_SR,WHODAS_SR_06","WHODAS_SR,WHODAS_SR_07","WHODAS_SR,WHODAS_SR_08","WHODAS_SR,WHODAS_SR_09","WHODAS_SR,WHODAS_SR_10","WHODAS_SR,WHODAS_SR_11","WHODAS_SR,WHODAS_SR_12","WHODAS_SR,WHODAS_SR_Days01","WHODAS_SR,WHODAS_SR_Days02","WHODAS_SR,WHODAS_SR_Days03","WHODAS_SR,WHODAS_SR_Score"
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,55.0,31.0,100.0,100.0


Unnamed: 0,"CIS_SR,CIS_SR_01","CIS_SR,CIS_SR_02","CIS_SR,CIS_SR_03","CIS_SR,CIS_SR_04","CIS_SR,CIS_SR_05","CIS_SR,CIS_SR_06","CIS_SR,CIS_SR_07","CIS_SR,CIS_SR_08","CIS_SR,CIS_SR_09","CIS_SR,CIS_SR_10","CIS_SR,CIS_SR_11","CIS_SR,CIS_SR_12","CIS_SR,CIS_SR_13","CIS_SR,CIS_SR_Total"
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,47.0


Unnamed: 0,"C3SR,C3SR_01","C3SR,C3SR_02","C3SR,C3SR_03","C3SR,C3SR_04","C3SR,C3SR_05","C3SR,C3SR_06","C3SR,C3SR_07","C3SR,C3SR_08","C3SR,C3SR_09","C3SR,C3SR_10","C3SR,C3SR_11","C3SR,C3SR_12","C3SR,C3SR_13","C3SR,C3SR_14","C3SR,C3SR_15","C3SR,C3SR_16","C3SR,C3SR_17","C3SR,C3SR_18","C3SR,C3SR_19","C3SR,C3SR_20","C3SR,C3SR_21","C3SR,C3SR_22","C3SR,C3SR_23","C3SR,C3SR_24","C3SR,C3SR_25","C3SR,C3SR_26","C3SR,C3SR_27","C3SR,C3SR_28","C3SR,C3SR_29","C3SR,C3SR_30","C3SR,C3SR_31","C3SR,C3SR_32","C3SR,C3SR_33","C3SR,C3SR_34","C3SR,C3SR_35","C3SR,C3SR_36","C3SR,C3SR_37","C3SR,C3SR_38","C3SR,C3SR_39","C3SR,C3SR_AG","C3SR,C3SR_AG_T","C3SR,C3SR_FR","C3SR,C3SR_FR_T","C3SR,C3SR_HY","C3SR,C3SR_HY_T","C3SR,C3SR_IN","C3SR,C3SR_IN_T","C3SR,C3SR_LP","C3SR,C3SR_LP_T","C3SR,C3SR_NI","C3SR,C3SR_PI"
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,41.0,0.0,41.0,0.0,40.0,0.0,40.0,0.0,40.0,0.0,0.0
max,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,17.0,90.0,15.0,90.0,15.0,90.0,18.0,90.0,15.0,90.0,5.0,6.0


In [7]:
total_score_cols_w_raw = [
                    "Barratt,Barratt_Total", 
                   ]
total_score_raw_cols = [x.strip("_T") for x in total_score_cols_w_raw if x.endswith("_T")]
subscale_score_cols_w_raw = ["Barratt,Barratt_Total_Edu", "Barratt,Barratt_Total_Occ",
                       "C3SR,C3SR_AG", "C3SR,C3SR_AG_T", "C3SR,C3SR_FR", "C3SR,C3SR_FR_T", "C3SR,C3SR_HY", "C3SR,C3SR_HY_T", "C3SR,C3SR_IN", "C3SR,C3SR_IN_T", "C3SR,C3SR_LP", "C3SR,C3SR_LP_T", "C3SR,C3SR_NI", "C3SR,C3SR_PI"
                            ]
subscale_score_raw_cols = [x.strip("_T") for x in subscale_score_cols_w_raw if x.endswith("_T")]

# Item level columns = all columns except those of total and subscale scores (includes diag cols)
item_level_cols = [x for x in relevant_columns if (x not in total_score_cols_w_raw) and (x not in subscale_score_cols_w_raw)]
item_level_col_subset = [x for x in relevant_data.columns if (x not in total_score_cols_w_raw) and (x not in subscale_score_cols_w_raw)]
relevant_data_item_lvl = relevant_data[item_level_col_subset]
print(len(relevant_data_item_lvl.columns))

198


In [8]:
# Remove _WAS_MISSING columns that are not linked to present from each dataset
was_missing_cols = [x for x in relevant_data.columns if "_WAS_MISSING" in x]
was_missing_col_originals = [x.split("_WAS_MISSING")[0] for x in was_missing_cols]

for col in was_missing_col_originals:
    cols_matching_was_missing = [x for x in relevant_data if x.endswith(col)]
    if col not in relevant_data_item_lvl.columns and col +"_WAS_MISSING" in relevant_data_item_lvl.columns:
        relevant_data_item_lvl = relevant_data_item_lvl.drop(col+"_WAS_MISSING", axis=1)
print(len(relevant_data_item_lvl.columns))

# Export final datasets

## Predicting impairment (remove impairment questionnaires from input)
impairment_assessments = ["WHODAS_SR,", "WHODAS_P,", "CIS_SR,", "CIS_P,"]

impairment_columns = [] # All item-level impairment (keep total impairment scores for output)
for impairment_assessment in  impairment_assessments:
    impairment_columns.extend([x for x in relevant_data if x.startswith(impairment_assessment) and x not in ["WHODAS_P,WHODAS_P_Total", "CIS_P,CIS_P_Score", "WHODAS_SR,WHODAS_SR_Score", "CIS_SR,CIS_SR_Total"]])

relevant_data_item_lvl_wo_impairment = relevant_data_item_lvl[[x for x in relevant_data_item_lvl.columns if x not in impairment_columns]]

relevant_data_item_lvl_wo_impairment.to_csv(data_intermediate_dir + "item_lvl_wo_impairment.csv", index=False)

## Predicting diagnoses
relevant_data_item_lvl_w_impairment = relevant_data_item_lvl

relevant_data_item_lvl_w_impairment.to_csv(data_intermediate_dir + "item_lvl_w_impairment.csv", index=False)

# Check value distributions of item level vars (to see that we didn't miss any subscale scores)
relevant_data_item_lvl.describe().loc[["min", "max"],:]

198


Unnamed: 0,"Basic_Demos,Age","Basic_Demos,Sex","WIAT,WIAT_Num_Stnd","WIAT,WIAT_Word_Stnd","WISC,WISC_Coding_Scaled","WISC,WISC_FSIQ","WISC,WISC_SS_Scaled","NIH_Scores,NIH7_Card","NIH_Scores,NIH7_Complete","NIH_Scores,NIH7_Flanker","NIH_Scores,NIH7_List","NIH_Scores,NIH7_Pattern","Barratt,financialsupport","WHODAS_P,WHODAS_P_01","WHODAS_P,WHODAS_P_02","WHODAS_P,WHODAS_P_03","WHODAS_P,WHODAS_P_04","WHODAS_P,WHODAS_P_05","WHODAS_P,WHODAS_P_06","WHODAS_P,WHODAS_P_07","WHODAS_P,WHODAS_P_08","WHODAS_P,WHODAS_P_09","WHODAS_P,WHODAS_P_10","WHODAS_P,WHODAS_P_11","WHODAS_P,WHODAS_P_12","WHODAS_P,WHODAS_P_Days01","WHODAS_P,WHODAS_P_Days02","WHODAS_P,WHODAS_P_Days03","WHODAS_P,WHODAS_P_Total","CIS_P,CIS_P_01","CIS_P,CIS_P_02","CIS_P,CIS_P_03","CIS_P,CIS_P_04","CIS_P,CIS_P_05","CIS_P,CIS_P_06","CIS_P,CIS_P_07","CIS_P,CIS_P_08","CIS_P,CIS_P_09","CIS_P,CIS_P_10","CIS_P,CIS_P_11","CIS_P,CIS_P_12","CIS_P,CIS_P_13","CIS_P,CIS_P_Score","WHODAS_SR,WHODAS_SR_01","WHODAS_SR,WHODAS_SR_02","WHODAS_SR,WHODAS_SR_03","WHODAS_SR,WHODAS_SR_04","WHODAS_SR,WHODAS_SR_05","WHODAS_SR,WHODAS_SR_06","WHODAS_SR,WHODAS_SR_07","WHODAS_SR,WHODAS_SR_08","WHODAS_SR,WHODAS_SR_09","WHODAS_SR,WHODAS_SR_10","WHODAS_SR,WHODAS_SR_11","WHODAS_SR,WHODAS_SR_12","WHODAS_SR,WHODAS_SR_Days01","WHODAS_SR,WHODAS_SR_Days02","WHODAS_SR,WHODAS_SR_Days03","WHODAS_SR,WHODAS_SR_Score","CIS_SR,CIS_SR_01","CIS_SR,CIS_SR_02","CIS_SR,CIS_SR_03","CIS_SR,CIS_SR_04","CIS_SR,CIS_SR_05","CIS_SR,CIS_SR_06","CIS_SR,CIS_SR_07","CIS_SR,CIS_SR_08","CIS_SR,CIS_SR_09","CIS_SR,CIS_SR_10","CIS_SR,CIS_SR_11","CIS_SR,CIS_SR_12","CIS_SR,CIS_SR_13","CIS_SR,CIS_SR_Total","C3SR,C3SR_01","C3SR,C3SR_02","C3SR,C3SR_03","C3SR,C3SR_04","C3SR,C3SR_05","C3SR,C3SR_06","C3SR,C3SR_07","C3SR,C3SR_08","C3SR,C3SR_09","C3SR,C3SR_10","C3SR,C3SR_11","C3SR,C3SR_12","C3SR,C3SR_13","C3SR,C3SR_14","C3SR,C3SR_15","C3SR,C3SR_16","C3SR,C3SR_17","C3SR,C3SR_18","C3SR,C3SR_19","C3SR,C3SR_20","C3SR,C3SR_21","C3SR,C3SR_22","C3SR,C3SR_23","C3SR,C3SR_24","C3SR,C3SR_25","C3SR,C3SR_26","C3SR,C3SR_27","C3SR,C3SR_28","C3SR,C3SR_29","C3SR,C3SR_30","C3SR,C3SR_31","C3SR,C3SR_32","C3SR,C3SR_33","C3SR,C3SR_34","C3SR,C3SR_35","C3SR,C3SR_36","C3SR,C3SR_37","C3SR,C3SR_38","C3SR,C3SR_39",Diag: Enuresis,Diag: Tobacco Use Disorder,Diag: Disinhibited Social Engagement Disorder,Diag: Selective Mutism,Diag: Social (Pragmatic) Communication Disorder,Diag: Panic Disorder,Diag: Developmental Coordination Disorder,Diag: Anorexia Nervosa-Binge Eating/Purging Type,Diag: ADHD-Inattentive Type,Diag: ADHD-Hyperactive/Impulsive Type,Diag: Reactive Attachment Disorder,Diag: Speech Sound Disorder,Diag: Delirium due to multiple etiologies,Diag: Binge-Eating Disorder,Diag: Narcolepsy,Diag: Other Specified Depressive Disorder,Diag: Other Specified Trauma- and Stressor-Related Disorder,Diag: Persistent Depressive Disorder (Dysthymia),Diag: Unspecified Tic Disorder,Diag: Pica in Children,Diag: Intellectual Disability-Moderate,Diag: Insomnia Disorder,Diag: Other Specified Anxiety Disorder,Diag: Other Specified Elimination Disorder with Urinary Symptoms,Diag: Cyclothymic Disorder,Diag: Specific Learning Disorder with Impairment in Mathematics,Diag: Unspecified Attention-Deficit/Hyperactivity Disorder,Diag: Adjustment Disorders,Diag: Substance/Medication-Induced Bipolar and Related Disorder,Diag: Gender Dysphoria in Adolescents and Adults,Diag: Unspecified Neurodevelopmental Disorder,Diag: Obsessive-Compulsive Disorder,Diag: Other Specified Obsessive-Compulsive and Related Disorder,Diag: Social Anxiety (Social Phobia),Diag: Child Onset Fluency Disorder (Stuttering),Diag: Separation Anxiety,Diag: Tourettes Disorder,Diag: Sibling Relational Problem,Diag: Specific Learning Disorder with Impairment in Written Expression,Diag: Other Specified Neurodevelopmental Disorder,Diag: Trichotillomania (Hair-Pulling Disorder),Diag: Other Specified Elimination Disorder with Fecal Symptoms,Diag: Intermittent Explosive Disorder,Diag: Major Depressive Disorder,Diag: Language Disorder,Diag: Other Specified Schizophrenia Spectrum and Other Psychotic Disorder,Diag: Conduct Disorder-Unspecified onset,Diag: Provisional Tic Disorder,Diag: Excoriation (Skin-Picking) Disorder,Diag: Conduct Disorder-Adolescent-onset type,Diag: Anorexia Nervosa-Restricting Type,Diag: Avoidant/Restrictive Food Intake Disorder,Diag: Schizophrenia,Diag: Agoraphobia,Diag: Parent-Child Relational Problem,Diag: Unspecified Intellectual Disability,Diag: Other Specified Attention-Deficit/Hyperactivity Disorder,Diag: Bulimia Nervosa,Diag: Bipolar II Disorder,Diag: Disruptive Mood Dysregulation Disorder,Diag: Conduct Disorder-Childhood-onset type,Diag: Non-Rapid Eye Movement Sleep Arousal Disorders,Diag: Unspecified Anxiety Disorder,Diag: Encopresis,Diag: Intellectual Disability-Mild,Diag: Oppositional Defiant Disorder,Diag: Hoarding Disorder,Diag: ADHD-Combined Type,Diag: Delirium due to another medical condition,Diag: Unspecified Trauma- and Stressor-Related Disorder,Diag: Cannabis Use Disorder,Diag: Other Specified Tic Disorder,Diag: Specific Learning Disorder with Impairment in Reading,Diag: Specific Phobia,Diag: Other Specified Feeding or Eating Disorder,Diag: No Diagnosis Given,Diag: Borderline Intellectual Functioning,Diag: Unspecified Depressive Disorder,Diag: Conversion Disorder,Diag: Illness Anxiety Disorder,Diag: Autism Spectrum Disorder,Diag: Neurobehavioral Disorder Associated with Prenatal Alcohol Exposure (ND-PAE),Diag: Generalized Anxiety Disorder,Diag: Persistent (Chronic) Motor or Vocal Tic Disorder,"Diag: Other Specified Disruptive, Impulse-Control, and Conduct Disorder",Diag: Posttraumatic Stress Disorder
min,7.237166,0.0,46.0,40.0,1.0,47.0,1.0,57.0,0.0,58.0,48.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,16.962126,1.0,160.0,142.0,19.0,147.0,19.0,172.0,2.0,151.0,181.0,169.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,30.0,30.0,30.0,79.17,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,49.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,55.0,31.0,100.0,100.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,47.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
