In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from re import M
import pandas as pd
import numpy as np

pd.set_option("max_columns", None)

In [8]:
# Get approximate number of available training examples

swan = pd.read_csv('data/SWAN.csv')
whodas = pd.read_csv('data/WHODAS.csv')
diagnoses = pd.read_csv('data/diagnoses-data.csv')
diagnoses = diagnoses.rename(columns = lambda column: column.split(",")[-1])
# Ignore the extra extra header
swan = swan.iloc[1:, :]
whodas = whodas.iloc[1:, :]

In [9]:
merged_swan_diag = pd.merge(swan, diagnoses, on="EID")
merged_whodas_diag = pd.merge(whodas, diagnoses, on="EID")

In [10]:
merged_swan_diag["Anonymized ID"].count()

3147

In [11]:
merged_swan_diag["Anonymized ID"].nunique()

3147

In [12]:
merged_whodas_diag["Anonymized ID"].count()

2531

In [13]:
merged_whodas_diag["Anonymized ID"].nunique()

2531

In [16]:
# How much data before November 2016 (WHODAS added)
merged_swan_diag[(merged_swan_diag["Year_x"]=="2016") & ((merged_swan_diag["Season_x"]=="Winter") | (merged_swan_diag["Season_x"] == "Spring") | (merged_swan_diag["Season_x"] == "Summer") )]["Anonymized ID"].count()

328

In [17]:
# Start from winter 2016 

merged_swan_diag_dropped = merged_swan_diag.drop(merged_swan_diag[
    (merged_swan_diag["Year_x"] == "2016") & (
        (merged_swan_diag["Season_x"] == "Winter") | 
        (merged_swan_diag["Season_x"] == "Spring") | 
        (merged_swan_diag["Season_x"] == "Summer") ) ].index)

# Remove missing diagnoses
merged_swan_diag_dropped = merged_swan_diag_dropped.drop(merged_swan_diag_dropped[
    (merged_swan_diag_dropped["DX_01"] == "No Diagnosis Given: Incomplete Eval")].index)

merged_swan_diag_dropped.count()[0]

2732

In [18]:
# Diagnosis counts (any position)
diag_colunms = ["DX_01", "DX_02", "DX_03", "DX_04", "DX_05", "DX_06", "DX_07", "DX_08", "DX_09", "DX_10"]
pd.set_option('display.max_rows', None) 
merged_swan_diag_dropped[diag_colunms].apply(pd.Series.value_counts).sum(numeric_only=True, axis=1).sort_values(ascending=False)

ADHD-Combined Type                                                             781.0
ADHD-Inattentive Type                                                          764.0
Specific Learning Disorder with Impairment in Reading                          504.0
Autism Spectrum Disorder                                                       476.0
Generalized Anxiety Disorder                                                   411.0
Oppositional Defiant Disorder                                                  392.0
Language Disorder                                                              358.0
Specific Phobia                                                                322.0
Social Anxiety (Social Phobia)                                                 285.0
Specific Learning Disorder with Impairment in Mathematics                      236.0
No Diagnosis Given                                                             233.0
Enuresis                                                         

In [24]:
# Diagnosis combinations counts
def clean_diag_list(row):
    row_without_duplicates = list({x for x in set(row) if x==x})
    row_without_duplicates.sort()
    return str(row_without_duplicates)

list_of_diagnosis_sets = list(map(clean_diag_list, merged_swan_diag_dropped[diag_colunms].to_numpy()))

counter = Counter(list_of_diagnosis_sets).most_common()
diag_combination_counter_df = pd.DataFrame.from_dict(counter)

pd.set_option('display.max_colwidth', None)

diag_combination_counter_df.head(10000)

Unnamed: 0,0,1
0,['No Diagnosis Given'],233
1,['ADHD-Inattentive Type'],153
2,['ADHD-Combined Type'],122
3,['Specific Learning Disorder with Impairment in Reading'],60
4,"['ADHD-Combined Type', 'Oppositional Defiant Disorder']",55
5,"['ADHD-Inattentive Type', 'Autism Spectrum Disorder']",41
6,"['ADHD-Combined Type', 'Autism Spectrum Disorder']",39
7,['Other Specified Attention-Deficit/Hyperactivity Disorder'],38
8,['Autism Spectrum Disorder'],31
9,"['ADHD-Inattentive Type', 'Specific Learning Disorder with Impairment in Reading']",26


In [None]:
pd.set_option('display.max_rows', 20) 

## Explore questionnaire distribution
### LORIS data

In [2]:
# LORIS saved query (all data)
full = pd.read_csv("data/all.csv", dtype=object)

In [6]:
# Check which questionnaires are in the rows with unserscores (NDARZZ007YMP_1, NDARAA075AMK_Visit_1)
rows_with_underscore_in_id = full[full["Identifiers"].str.contains("_")]
non_empty_columns = rows_with_underscore_in_id.columns[
    ~rows_with_underscore_in_id.isin(["."]).all()].tolist()
non_empty_questionnaires_in_underscore = set([x.split(",")[0] for x in non_empty_columns])
non_empty_questionnaires_in_underscore

{'DailyMeds', 'Identifiers', 'TRF', 'TRF_P', 'TRF_Pre'}

In [7]:
# Drop rows with underscores and the questionnaires present in rows with underscores 
non_empty_questionnaires_in_underscore.remove("Identifiers")
main = full[~full["Identifiers"].str.contains("_")]
len(main.columns)

7120

In [8]:
filtered_main = main
for questionnaire in non_empty_questionnaires_in_underscore:
    filtered_main = filtered_main.drop(filtered_main.filter(regex=(questionnaire+",")), axis=1)
    len(filtered_main.columns)

In [9]:
# Exract ID
filtered_main["ID"] = filtered_main["Identifiers"].str.split(',').str[0].str.split('_').str[0]

# Drop IDs with quotes around them (duplicates)
filtered_main[filtered_main["ID"].str.contains("'")]
filtered_main = filtered_main.drop(filtered_main[filtered_main["ID"].str.contains("'")].index)

In [11]:
filtered_main["Identifiers"].count()

3630

In [12]:
filtered_main["ID"].count()

3630

In [13]:
filtered_main["ID"].nunique()

3630

In [14]:
# Replace . with NaN
filtered_main = filtered_main.replace(".", np.nan)

In [16]:
# How many ppl answered all questionnaires (Data_entry value for every questionnaire is not empty)
data_entry_columns = filtered_main.filter(regex=(",Data_entry"))
data_entry_columns.dropna(how = 'any')

Unnamed: 0,"ACE,Data_entry","APQ_P,Data_entry","APQ_SR,Data_entry","ARI_P,Data_entry","ARI_S,Data_entry","ASR,Data_entry","ASSQ,Data_entry","Audit,Data_entry","BIA,Data_entry","Barratt,Data_entry","Basic_Demos,Data_entry","C3SR,Data_entry","CAARS,Data_entry","CBCL,Data_entry","CBCL_Pre,Data_entry","CCSC,Data_entry","CDI_P,Data_entry","CDI_SR,Data_entry","CELF,Data_entry","CELF5_Meta,Data_entry","CELF_Full_5to8,Data_entry","CELF_Full_9to21,Data_entry","CELF_Meta,Data_entry","CGAS,Data_entry","CIS_P,Data_entry","CIS_SR,Data_entry","CPIC,Data_entry","CTOPP,Data_entry","ColorVision,Data_entry","ConsensusDx,Data_entry","DSM5_Dx,Data_entry","DTS,Data_entry","DigitSpan,Data_entry","DrugScreen,Data_entry","EEG_Track,Data_entry","EHQ,Data_entry","ESPAD,Data_entry","ESWAN,Data_entry","EVT,Data_entry","FFQ,Data_entry","FGC,Data_entry","FSQ,Data_entry","FTND,Data_entry","FTQA,Data_entry","GFTA,Data_entry","IAT,Data_entry","ICU_P,Data_entry","KBIT,Data_entry","MFQ_P,Data_entry","MFQ_SR,Data_entry","MRI_Track,Data_entry","NIDA,Data_entry","NIH5,Data_entry","NIH7,Data_entry","NLES_P,Data_entry","NLES_SR,Data_entry","PAQ_A,Data_entry","PAQ_C,Data_entry","PBQ,Data_entry","PCIAT,Data_entry","PPS,Data_entry","PPVT,Data_entry","PSI,Data_entry","Pegboard,Data_entry","PhenX_Neighborhood,Data_entry","PhenX_SchoolRisk,Data_entry","Physical,Data_entry","PreInt_Demos_Fam,Data_entry","PreInt_Demos_Home,Data_entry","PreInt_DevHx,Data_entry","PreInt_EduHx,Data_entry","PreInt_FamHx,Data_entry","PreInt_FamHx_RDC,Data_entry","PreInt_Lang,Data_entry","PreInt_TxHx,Data_entry","Pregnancy,Data_entry","Quotient,Data_entry","Quotient_Ext,Data_entry","RANRAS,Data_entry","RBS,Data_entry","SAS,Data_entry","SCARED_P,Data_entry","SCARED_SR,Data_entry","SCQ,Data_entry","SDQ,Data_entry","SDS,Data_entry","SRS,Data_entry","SRS_Pre,Data_entry","STAI,Data_entry","SWAN,Data_entry","SympChck,Data_entry","TOWRE,Data_entry","Tanner,Data_entry","Vineland,Data_entry","WAIS,Data_entry","WASI,Data_entry","WHODAS_P,Data_entry","WHODAS_SR,Data_entry","WIAT,Data_entry","WISC,Data_entry","YFAS,Data_entry","YFAS_C,Data_entry","YSR,Data_entry"


In [17]:
# Get questionnaires most people answered
pd.set_option('display.max_rows', None)
questionnaire_answer_counts = data_entry_columns.count().sort_values(ascending=False).to_frame()
questionnaire_answer_counts["Ratio"] = questionnaire_answer_counts[0]/filtered_main["ID"].nunique()*100
questionnaire_answer_counts

Unnamed: 0,0,Ratio
"Basic_Demos,Data_entry",3630,100.0
"PreInt_EduHx,Data_entry",3559,98.044077
"PreInt_DevHx,Data_entry",3553,97.878788
"WIAT,Data_entry",3549,97.768595
"ColorVision,Data_entry",3524,97.07989
"Pegboard,Data_entry",3515,96.831956
"Physical,Data_entry",3503,96.501377
"FGC,Data_entry",3483,95.950413
"PreInt_TxHx,Data_entry",3477,95.785124
"CELF,Data_entry",3463,95.399449


In [18]:
# List of top mental health quetsionnaires and output variables (consensus diagnosis and impairment)
top_mh_assessments = ['ARI_S', 'SympChck', 'SCQ', 'ASSQ', 'SDQ', 'ARI_P', 'SWAN', 'SRS', 'CBCL', 'NLES_P', 
'SCARED_P', 'ICU_P', 'PCIAT', 'DTS', 'ESWAN', 'MFQ_P', 'YFAS_C', 'WHODAS_P', 'SDS', 'CIS_P', 'SAS', 'WHODAS_SR', 
'CIS_SR', 'CGAS', 'RBS', 'C3SR', 'SCARED_SR', 'MFQ_SR', 'ConsensusDx', 'CCSC', 'YSR', 'CDI_P', 'CDI_SR', 'YFAS', 
'CBCL_Pre', 'SRS_Pre', 'ASR', 'CAARS', 'STAI']

In [19]:
# get number of people who took all top 1, top 2, top 3, etc. popular assessments
cumul_number_of_examples = []
for i in range(1, len(top_mh_assessments)):
    columns = [x+",Data_entry" for x in top_mh_assessments[0:i]]
    print(columns)
    print(data_entry_columns[columns].notnull().all(axis=1).sum())
    cumul_number_of_examples.append(data_entry_columns[columns].notnull().all(axis=1).sum())

['ARI_S,Data_entry']
3276
['ARI_S,Data_entry', 'SympChck,Data_entry']
3069
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry']
3055
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry']
3033
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry']
2981
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry', 'ARI_P,Data_entry']
2964
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry', 'ARI_P,Data_entry', 'SWAN,Data_entry']
2890
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry', 'ARI_P,Data_entry', 'SWAN,Data_entry', 'SRS,Data_entry']
2813
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry', 'ARI_P,Data_entry', 'SWAN,Data_entry', 'SRS,Data_entry', 'CBCL,Data_entry']
2812
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry'

['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry', 'ARI_P,Data_entry', 'SWAN,Data_entry', 'SRS,Data_entry', 'CBCL,Data_entry', 'NLES_P,Data_entry', 'SCARED_P,Data_entry', 'ICU_P,Data_entry', 'PCIAT,Data_entry', 'DTS,Data_entry', 'ESWAN,Data_entry', 'MFQ_P,Data_entry', 'YFAS_C,Data_entry', 'WHODAS_P,Data_entry', 'SDS,Data_entry', 'CIS_P,Data_entry', 'SAS,Data_entry', 'WHODAS_SR,Data_entry', 'CIS_SR,Data_entry', 'CGAS,Data_entry', 'RBS,Data_entry', 'C3SR,Data_entry', 'SCARED_SR,Data_entry', 'MFQ_SR,Data_entry', 'ConsensusDx,Data_entry', 'CCSC,Data_entry', 'YSR,Data_entry', 'CDI_P,Data_entry']
0
['ARI_S,Data_entry', 'SympChck,Data_entry', 'SCQ,Data_entry', 'ASSQ,Data_entry', 'SDQ,Data_entry', 'ARI_P,Data_entry', 'SWAN,Data_entry', 'SRS,Data_entry', 'CBCL,Data_entry', 'NLES_P,Data_entry', 'SCARED_P,Data_entry', 'ICU_P,Data_entry', 'PCIAT,Data_entry', 'DTS,Data_entry', 'ESWAN,Data_entry', 'MFQ_P,Data_entry', 'YFAS_C,Data_entry', 'WHODAS_P,Data

Only 420 people who completed most popular questionnaires up to ConsensusDx

In [20]:
# Check if there are more complete diagnoses in COINS assessment data
diag = pd.read_csv('data/assessment_data/9994_ConsensusDx_20211130.csv')
diag.count()

  exec(code_obj, self.user_global_ns, self.user_ns)


Anonymized ID            6183
Subject Type             6183
Visit                    6183
Days since enrollment    6183
EID                      6183
START_DATE               6183
Study                    6183
Site                     6183
Year                     6183
Season                   6183
NoDX                     6181
DX_01_Cat                6181
DX_01_Sub                3651
DX_01                    6143
DX_01_Spec                814
DX_01_Code               5964
DX_01_Time               6075
DX_01_Confirmed          3295
DX_01_Presum             3294
DX_01_RC                 6074
DX_01_RuleOut            6075
DX_01_ByHx               5688
DX_01_New                2786
DX_01_Rem                2786
DX_01_PRem               2786
DX_01_Past_Doc            179
DX_02_Cat                3431
DX_02_Sub                1758
DX_02                    3294
DX_02_Spec                844
DX_02_Code               3181
DX_02_Time               5917
DX_02_Confirmed          1952
DX_02_Pres

In [21]:
diag["EID"].nunique()

3616

In [22]:
# Remove duplicated lines
diag_clean = diag.drop_duplicates()
diag_clean.count()

Anonymized ID            3975
Subject Type             3975
Visit                    3975
Days since enrollment    3975
EID                      3975
START_DATE               3975
Study                    3975
Site                     3975
Year                     3975
Season                   3975
NoDX                     3973
DX_01_Cat                3973
DX_01_Sub                2376
DX_01                    3935
DX_01_Spec                580
DX_01_Code               3767
DX_01_Time               3867
DX_01_Confirmed          2229
DX_01_Presum             2228
DX_01_RC                 3866
DX_01_RuleOut            3867
DX_01_ByHx               3649
DX_01_New                1642
DX_01_Rem                1642
DX_01_PRem               1642
DX_01_Past_Doc            112
DX_02_Cat                2347
DX_02_Sub                1121
DX_02                    2209
DX_02_Spec                605
DX_02_Code               2106
DX_02_Time               3709
DX_02_Confirmed          1360
DX_02_Pres

In [23]:
diag_clean["EID"].nunique() 

3616

In [24]:
diag_clean["DX_01"].value_counts() 

ADHD-Combined Type                                                     779
ADHD-Inattentive Type                                                  622
No Diagnosis Given: Incomplete Eval                                    456
No Diagnosis Given                                                     309
Autism Spectrum Disorder                                               295
Specific Learning Disorder with Impairment in Reading                  209
Generalized Anxiety Disorder                                           171
Major Depressive Disorder                                              146
ADHD-Hyperactive/Impulsive Type                                        115
Other Specified Attention-Deficit/Hyperactivity Disorder               106
Social Anxiety (Social Phobia)                                          97
Language Disorder                                                       76
Oppositional Defiant Disorder                                           55
Other Specified Anxiety D

456 incomplete eval

Much more data in COINS than LORIS

In [26]:
# Check if per-assessment LORIS has more diagnosis data than full LORIS
LORIS_diag = pd.read_csv('data/LORIS-diag.csv')
LORIS_diag.count()

Identifiers                                     3615
Diagnosis_ClinicianConsensus,Administration     3615
Diagnosis_ClinicianConsensus,Comment_ID            0
Diagnosis_ClinicianConsensus,DX_01              3615
Diagnosis_ClinicianConsensus,DX_01_ByHx         3420
Diagnosis_ClinicianConsensus,DX_01_Cat          3613
Diagnosis_ClinicianConsensus,DX_01_Code         3515
Diagnosis_ClinicianConsensus,DX_01_Confirmed    2178
Diagnosis_ClinicianConsensus,DX_01_New          1440
Diagnosis_ClinicianConsensus,DX_01_PRem         1440
Diagnosis_ClinicianConsensus,DX_01_Past_Doc      108
Diagnosis_ClinicianConsensus,DX_01_Presum       2177
Diagnosis_ClinicianConsensus,DX_01_RC           3614
Diagnosis_ClinicianConsensus,DX_01_Rem          1440
Diagnosis_ClinicianConsensus,DX_01_RuleOut      3615
Diagnosis_ClinicianConsensus,DX_01_Spec          483
Diagnosis_ClinicianConsensus,DX_01_Sub          2162
Diagnosis_ClinicianConsensus,DX_01_Time         3615
Diagnosis_ClinicianConsensus,DX_02            

In [27]:
LORIS_diag["Identifiers"].nunique()

3615

### COINS

In [5]:
# Read all coins data (only Questionnnaire Measures of Emotional and Cognitive Status from Assessment_List_Jan2019)
# exclude TRF and TRF_Pre for now, to investigate what to do with multiple entries
assessments = ['ASR', 'YSR', 'MFQ_SR', 'SCARED_SR', 'C3SR', 'ARI_S', 'CSSRS', 'CIS_SR', 'WHODAS_SR', 
'ICU_SR', 'PANAS', 'CBCL', 'CBCL_Pre', 'MFQ_P', 'SCARED_P', 'ESWAN', 'SWAN', 'ASSQ', 'SCQ', 'CIS_P', 
'SAS', 'WHODAS_P', 'SDQ', 'RBS', 'SRS', 'SRS_Pre', 'ARI_P', 'ICU_P', 'SympChck']

# Read all assessment files into dataframes
coins_file_names = ['9994_' + x + '_20211130.csv' for x in assessments]

all_coins_dfs = []
for file_name in coins_file_names:
    temp_df = pd.read_csv('data/assessment_data/' + file_name, header=1)
    # check if there are assessments with duplicate IDs, because we will aggregate assessments by ID after
    print(file_name, temp_df["ID"].duplicated().any()) # 'TRF', 'TRF_Pre', exclude for now
    all_coins_dfs.append(temp_df)

9994_ASR_20211130.csv False
9994_YSR_20211130.csv False
9994_MFQ_SR_20211130.csv False
9994_SCARED_SR_20211130.csv False
9994_C3SR_20211130.csv False
9994_ARI_S_20211130.csv False
9994_CSSRS_20211130.csv False
9994_CIS_SR_20211130.csv False
9994_WHODAS_SR_20211130.csv False
9994_ICU_SR_20211130.csv False
9994_PANAS_20211130.csv False
9994_CBCL_20211130.csv False
9994_CBCL_Pre_20211130.csv False
9994_MFQ_P_20211130.csv False
9994_SCARED_P_20211130.csv False
9994_ESWAN_20211130.csv False
9994_SWAN_20211130.csv False
9994_ASSQ_20211130.csv False
9994_SCQ_20211130.csv False
9994_CIS_P_20211130.csv False
9994_SAS_20211130.csv False
9994_WHODAS_P_20211130.csv False
9994_SDQ_20211130.csv False
9994_RBS_20211130.csv False
9994_SRS_20211130.csv False
9994_SRS_Pre_20211130.csv False
9994_ARI_P_20211130.csv False
9994_ICU_P_20211130.csv False
9994_SympChck_20211130.csv False


In [7]:
# Concatenate all assessments
all_coins = pd.concat(all_coins_dfs)
del all_coins_dfs

all_coins.count()

ID            68885
SUB_TYPE      68885
VISIT         68885
DAY_LAG       68885
ASRHBN_001       96
              ...  
CSC_129        3241
CSC_130        3253
CSC_131        3249
CSC_132        3248
CSC_133        3248
Length: 1726, dtype: int64

In [9]:
all_coins["ID"].nunique()

3580

In [13]:
# Group by assessment
grouped_coins = all_coins.groupby(by=["ID", "SUB_TYPE", "VISIT", "DAY_LAG"])

AttributeError: 'DataFrameGroupBy' object has no attribute 'loc'

In [19]:
# Test groupby on a small subset
ASR = pd.read_csv('data/test/' + coins_file_names[0], header=1)
YSR = pd.read_csv('data/test/' + coins_file_names[1], header=1)
MFQ = pd.read_csv('data/test/' + coins_file_names[2], header=1)
ASR_YSR_MFQ_appended = pd.concat([ASR, YSR, MFQ])
ASR_YSR_MFQ_appended.head()
ASR_YSR_MFQ_appended.count()
ASR_YSR_MFQ_appended["ID"].nunique()
ASR_YSR_MFQ_appended["MFQSR_041"].value_counts()
ASR_YSR_MFQ_appended.apply(pd.Series.value_counts)
grouped_test = ASR_YSR_MFQ_appended.groupby(by=["ID", "SUB_TYPE", "VISIT", "DAY_LAG"])
grouped_test.apply(display)

Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
2,A00078869,UNKNOWN,V1,0,,,,,,,,,,,,NDARNH263WZP,1901-01-01T07:00:00Z,HBN,1.0,53.0,2015.0,Summer,0.0,1.0,2.0,66.0,,,,,,,,,,,


Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
0,A00078981,UNKNOWN,V1,0,,,,,,,,,,,,,,,,,,,,,,,NDARNL599TMZ,1901-01-01T07:00:00Z,HBN,1.0,214.0,2016.0,Summer,0.0,0.0,1.0,2.0


Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
0,A00078985,UNKNOWN,V1,0,NDARMY301WFK,1901-01-01T07:00:00Z,HBN,1.0,33.0,2016.0,Winter,0.0,2.0,0.0,44.0,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
1,A00078997,UNKNOWN,V1,0,NDARHF023VG3,1901-01-01T07:00:00Z,HBN,1.0,7.0,2016.0,Winter,0.0,0.0,0.0,47.0,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
0,A00079004,UNKNOWN,V1,0,,,,,,,,,,,,NDARJF045MHG,1901-01-01T07:00:00Z,HBN,1.0,124.0,2016.0,Spring,1.0,0.0,1.0,64.0,,,,,,,,,,,
1,A00079004,UNKNOWN,V1,0,,,,,,,,,,,,,,,,,,,,,,,NDARJF045MHG,1901-01-01T07:00:00Z,HBN,1.0,124.0,2016.0,Spring,1.0,1.0,1.0,14.0


Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
2,A00079017,UNKNOWN,V1,0,,,,,,,,,,,,,,,,,,,,,,,NDARWY198BVY,1901-01-01T07:00:00Z,HBN,1.0,198.0,2016.0,Summer,0.0,0.0,2.0,14.0


Unnamed: 0,ID,SUB_TYPE,VISIT,DAY_LAG,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_001,YSR1_002,YSR1_003,YSR1_004,YSR1_005,YSR1_006,YSR1_007,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_001,MFQSR_002,MFQSR_003,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_007,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
1,A00079078,UNKNOWN,V1,0,,,,,,,,,,,,NDARZG263HRK,1901-01-01T07:00:00Z,HBN,1.0,35.0,2016.0,Spring,0.0,0.0,0.0,46.0,,,,,,,,,,,
3,A00079078,UNKNOWN,V1,0,,,,,,,,,,,,,,,,,,,,,,,NDARZG263HRK,1901-01-01T07:00:00Z,HBN,1.0,69.0,2016.0,Spring,0.0,0.0,0.0,1.0


In [20]:
grouped_test.max()

  grouped_test.max()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,ASRHBN_001,ASRHBN_002,ASRHBN_003,ASRHBN_004,ASRHBN_005,ASRHBN_006,ASRHBN_007,ASRHBN_008,ASRHBN_009,ASRHBN_010,ASRHBN_160,YSR1_004,YSR1_005,YSR1_006,YSR1_008,YSR1_009,YSR1_010,YSR1_150,MFQSR_004,MFQSR_005,MFQSR_006,MFQSR_008,MFQSR_009,MFQSR_010,MFQSR_041
ID,SUB_TYPE,VISIT,DAY_LAG,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
A00078869,UNKNOWN,V1,0,,,,,,,,,,,,1.0,53.0,2015.0,0.0,1.0,2.0,66.0,,,,,,,
A00078981,UNKNOWN,V1,0,,,,,,,,,,,,,,,,,,,1.0,214.0,2016.0,0.0,0.0,1.0,2.0
A00078985,UNKNOWN,V1,0,NDARMY301WFK,1901-01-01T07:00:00Z,HBN,1.0,33.0,2016.0,Winter,0.0,2.0,0.0,44.0,,,,,,,,,,,,,,
A00078997,UNKNOWN,V1,0,NDARHF023VG3,1901-01-01T07:00:00Z,HBN,1.0,7.0,2016.0,Winter,0.0,0.0,0.0,47.0,,,,,,,,,,,,,,
A00079004,UNKNOWN,V1,0,,,,,,,,,,,,1.0,124.0,2016.0,1.0,0.0,1.0,64.0,1.0,124.0,2016.0,1.0,1.0,1.0,14.0
A00079017,UNKNOWN,V1,0,,,,,,,,,,,,,,,,,,,1.0,198.0,2016.0,0.0,0.0,2.0,14.0
A00079078,UNKNOWN,V1,0,,,,,,,,,,,,1.0,35.0,2016.0,0.0,0.0,0.0,46.0,1.0,69.0,2016.0,0.0,0.0,0.0,1.0
