This file contains the first portion of the CTSI example code. It imports, 
cleans, encodes, and scales information from the selected patient cohort in 
the OMOP database file and combines all of the data into one dataframe for analysis.

In [1]:
## Connect to the OMOP Database
import sqlite3
db_file = "omop.db"
sqliteconn = None
try:
    sqliteconn = sqlite3.connect(db_file)
    print(sqlite3.version)
except Error as e:
    print(e)
c = None
try:
    c = sqliteconn.cursor()
except Error as e:
    print(e)

2.6.0


In [2]:
#Import required libraries
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

In [3]:
## CREATE DATAFRAMES FROM DATABASE TABLES

In [4]:
## PERSON DF
query = '''
SELECT  p.person_id
        ,p.birth_datetime
        ,p.death_datetime
        ,c.concept_name as gender
        ,c1.concept_name as race
        ,c2.concept_name as ethnicity
from 
  person p
  join concept c on c.concept_id = p.gender_concept_id
  join concept c1 on c1.concept_id = p.race_concept_id
  join concept c2 on c2.concept_id = p.ethnicity_concept_id
'''
df_person = pd.read_sql_query(query,sqliteconn)

# create 'age' column for person dataframe
# convert dates ('objects') to datetime64[ns]
df_person['birth_datetime'] = pd.to_datetime(df_person['birth_datetime']).dt.date.astype('datetime64[ns]')
df_person['death_datetime'] = pd.to_datetime(df_person['death_datetime']).dt.date.astype('datetime64[ns]')
df_person['birth_datetime'] = (df_person['birth_datetime']).dt.strftime('%Y-%m-%d')
df_person['death_datetime'] = (df_person['death_datetime']).dt.strftime('%Y-%m-%d')
# define age calculation function for live patients
from datetime import datetime, date
def live_age(born):
    born = datetime.strptime(born,'%Y-%m-%d').date()
    today = date.today()
    return today.year-born.year-((today.month,today.day)<(born.month,born.day))
# define age calculation function for dead patients
def dead_age(born,death):
    born = datetime.strptime(born,'%Y-%m-%d').date()
    death = datetime.strptime(death,'%Y-%m-%d').date()
    return death.year-born.year-((death.month,death.day)<(born.month,born.day))
# add 'current_age' column to table
mask_live = df_person['death_datetime'].isnull()
mask_dead = df_person['death_datetime'].notnull()
df_person_live = df_person[mask_live]
df_person_dead = df_person[mask_dead]
df_person_live['age'] = df_person_live.apply(lambda x: live_age(x['birth_datetime']), axis=1)
df_person_dead['age'] = df_person_dead.apply(lambda x: dead_age(x['birth_datetime'],x['death_datetime']),axis=1)
df_patient = pd.concat([df_person_live,df_person_dead],ignore_index=True)
# convert 'current_age' type to Int64
pd.to_numeric(df_patient['age'],errors='coerce').convert_dtypes()
# sort by patient id
df_patient.sort_values('person_id')
# assign patient_id as df index
df_patient = df_patient.set_index('person_id')
# remove birth/death date columns
df_patient = df_patient.drop(columns=['birth_datetime','death_datetime'])
df_patient.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7428 entries, 1 to 7369
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   gender     7428 non-null   object
 1   race       7428 non-null   object
 2   ethnicity  7428 non-null   object
 3   age        7428 non-null   int64 
dtypes: int64(1), object(3)
memory usage: 290.2+ KB


In [5]:
## CONDITION DF
query = '''
SELECT  distinct person_id, c.concept_name
from 
  condition_occurrence icd
  --join cohort co on co.subject_id = icd.person_id
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%bipolar%'
  and domain_id = 'Condition'
'''
df_condition = pd.read_sql_query(query,sqliteconn)
# crosstab table so that person_id is the index and the BD conditions are the columns
# (creates a df using 1/0s)
df_condition = pd.crosstab(df_condition['person_id'],df_condition['concept_name'])
# condense condition variables
df_condition['Bipolar disorder, depressed'] = df_condition.iloc[:,np.r_[4,7:10]].max(1)
df_condition['Bipolar disorder, manic'] = df_condition.iloc[:,np.r_[5,10:14]].max(1)
df_condition['Bipolar disorder, mixed'] = df_condition.iloc[:,np.r_[6,25:29]].max(1)
df_condition['Bipolar disorder, unspecified'] = df_condition.iloc[:,14:18].max(1)
df_condition['Bipolar I disorder, unspecified'] = df_condition.iloc[:,0:3].max(1)
df_condition['Bipolar I disorder, depressed'] = df_condition.iloc[:,np.r_[18:20,31:33]].max(1)
df_condition['Bipolar I disorder, manic'] = df_condition.iloc[:,np.r_[20:22,33]].max(1)
df_condition['Bipolar I disorder, mixed'] = df_condition.iloc[:,np.r_[22:25,34]].max(1)
df_condition['Bipolar II disorder, unspecified'] = df_condition.iloc[:,3]
df_condition['Psychosis and severe depression co-occurrent'] = df_condition.iloc[:,20]
df_condition['Schizoaffective disorder'] = df_condition.iloc[:,30]
df_condition = df_condition.iloc[:,35:]
df_condition.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6381 entries, 3 to 7428
Data columns (total 11 columns):
 #   Column                                        Non-Null Count  Dtype
---  ------                                        --------------  -----
 0   Bipolar disorder, depressed                   6381 non-null   int64
 1   Bipolar disorder, manic                       6381 non-null   int64
 2   Bipolar disorder, mixed                       6381 non-null   int64
 3   Bipolar disorder, unspecified                 6381 non-null   int64
 4   Bipolar I disorder, unspecified               6381 non-null   int64
 5   Bipolar I disorder, depressed                 6381 non-null   int64
 6   Bipolar I disorder, manic                     6381 non-null   int64
 7   Bipolar I disorder, mixed                     6381 non-null   int64
 8   Bipolar II disorder, unspecified              6381 non-null   int64
 9   Psychosis and severe depression co-occurrent  6381 non-null   int64
 10  Schizoaffect

In [6]:
## DRUG DF
query = '''
SELECT de.person_id
        ,de.drug_exposure_end_date
        ,de.drug_source_value
        ,c2.concept_name as ingredient 
from drug_exposure de join concept c on c.concept_id = de.drug_concept_id  
join concept_ancestor ca on ca.descendant_concept_id = c.concept_id  
join concept c2 on c2.concept_id = ca.ancestor_concept_id  
where c2.concept_class_id = 'Ingredient'  
order by 1 asc;
'''
df_drug = pd.read_sql_query(query,sqliteconn)

# select the FDA-approved BD drugs 
drug_list = 'olanzapine|fluoxetine|quetiapine|lurasidone|chlorpromazine|divalproex|valproate|risperidone|ziprasidone|aripiprazole|carbamazepine|asenapine|lamotrigine'
mask = df_drug['drug_source_value'].str.contains(drug_list,case=False,na=False)
df_drug = df_drug[mask]
# keep the most recent drug patients were prescribed
df_drug = df_drug.sort_values('drug_exposure_end_date').drop_duplicates('person_id',keep='last')
# remove nonessential columns
df_drug = df_drug.drop(columns=['drug_exposure_end_date','drug_source_value'])
# assign patient_id as df index
df_drug = df_drug.set_index('person_id')
df_drug= df_drug.sort_index()
df_drug.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4018 entries, 3 to 7425
Data columns (total 1 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ingredient  4018 non-null   object
dtypes: object(1)
memory usage: 62.8+ KB


In [7]:
## OBSERVATION DF
query = '''
SELECT  o.person_id, o.observation_source_value, c.concept_name as observation_source
from observation o
join concept c on c.concept_id = o.observation_source_concept_id
order by 1 asc
'''
df_observation = pd.read_sql_query(query, sqliteconn)

# select relevant observations
obs_list='278.02|E66.3|4066F|783.0|783.1|R63.4|R63.5|783.21|783.22|R63.6|796.2|R03.0|F50.81|R46.0|T14.91XA|V11.3|Z71.41|Z81.1|V11.8|V11.9|Z13.30|Z13.39|Z81.1|Z86.59|T74.11XA|V15.41|Z68.1|Z68.20|Z68.21|Z68.23|Z68.24|Z68.25|Z68.26|Z68.27|Z68.28|Z68.29|Z68.30|Z68.31|Z68.32|Z68.33|Z68.34|Z68.35|Z68.36|Z68.37|Z68.38|Z68.39|Z68.40|Z68.41|Z68.42|Z68.43|Z68.44|Z68.45|X73.0XXA|X78.9XXA|X78.9XXD|Z04.6|Z13.30|Z13.39|V15.82|Z87.891|Z91.49' 
mask = df_observation['observation_source_value'].str.contains(obs_list,case=False,na=False)
df_observation = df_observation[mask]
# crosstab table so that person_id is the index and observations are the columns
# (creates a df using 1/0s)
df_observation = pd.crosstab(df_observation['person_id'],df_observation['observation_source'])
# simplify BMI variables
df_observation['underweight (BMI < 19.9)'] = df_observation.iloc[:,np.r_[6,52]].max(1)
df_observation['healthy weight (20.0 < BMI < 24.9)'] = df_observation.iloc[:,7:11].max(1)
df_observation['overweight (25.0 < BMI < 29.9)'] = df_observation.iloc[:,11:16].max(1)
df_observation['obese (BMI > 30.0)'] = df_observation.iloc[:,np.r_[16:31,44]].max(1)
df_observation['Elevated blood pressure reading'] = df_observation.iloc[:,np.r_[32:34]].max(1)
df_observation['Encounter for screening examination for mental health and behavioral disorders'] = df_observation.iloc[:,np.r_[35:37]].max(1)
df_observation['Intentional self-harm'] = df_observation.iloc[:,np.r_[39:42]].max(1)
df_observation['Personal history of other mental and behavioral disorders'] = df_observation.iloc[:,np.r_[47:49,50]].max(1)
# drop nonessential columns
df_observation.drop(df_observation.iloc[:,np.r_[6:31,32,33,35,36,39:42,44,48,50,52]],axis=1,inplace=True)
df_observation.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2267 entries, 5 to 7428
Data columns (total 25 columns):
 #   Column                                                                          Non-Null Count  Dtype
---  ------                                                                          --------------  -----
 0   Abnormal weight gain                                                            2267 non-null   int64
 1   Abnormal weight loss                                                            2267 non-null   int64
 2   Adult physical abuse, confirmed, initial encounter                              2267 non-null   int64
 3   Alcohol abuse counseling and surveillance of alcoholic                          2267 non-null   int64
 4   Anorexia                                                                        2267 non-null   int64
 5   Binge eating disorder                                                           2267 non-null   int64
 6   Electroconvulsive therapy (ECT) 

In [8]:
## PROCEDURE DF
query = '''
SELECT p.person_id, c.concept_id, c.concept_name as procedure
from procedure_occurrence p
join concept c on c.concept_id = p.procedure_concept_id
order by 1 asc
'''
df_procedure = pd.read_sql_query(query, sqliteconn)

# select relevant procedures
proc_list=[40664726,2795859,2213552,2795843,2795845,2795847,2795875,2795877,2795880,2213548,2795855,72123,40664726,710043,710044,2213545,2213546,2213548,2795682,2795855,43527904,43527905,43527986,43527987,43527988,43527989,43527990] 
mask = df_procedure['concept_id'].isin(proc_list)
df_procedure = df_procedure[mask]
# crosstab table so that person_id is the index and procedures are the columns
# (creates a df using 1/0s)
df_procedure = pd.crosstab(df_procedure['person_id'],df_procedure['procedure'])
# simplify procedure variables
df_procedure['Electroconvulsive therapy'] = df_procedure.iloc[:,np.r_[2:5,15]].max(1)
df_procedure['Family psychotherapy'] = df_procedure.iloc[:,np.r_[5:7]].max(1)
df_procedure['Group psychotherapy'] = df_procedure.iloc[:,np.r_[10:12]].max(1)
df_procedure['Individual psychotherapy'] = df_procedure.iloc[:,np.r_[14,16:23]].max(1)
df_procedure['Group counseling for substance abuse treatment'] = df_procedure.iloc[:,np.r_[7:10]].max(1)
df_procedure['Health behavior intervention'] = df_procedure.iloc[:,np.r_[12:14]].max(1)
# drop nonessential columns
df_procedure.drop(df_procedure.iloc[:,np.r_[2:23]],axis=1,inplace=True)
df_procedure.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 994 entries, 7 to 7422
Data columns (total 8 columns):
 #   Column                                                 Non-Null Count  Dtype
---  ------                                                 --------------  -----
 0   Annual depression screening, 15 minutes                994 non-null    int64
 1   Detoxification Services for Substance Abuse Treatment  994 non-null    int64
 2   Electroconvulsive therapy                              994 non-null    int64
 3   Family psychotherapy                                   994 non-null    int64
 4   Group psychotherapy                                    994 non-null    int64
 5   Individual psychotherapy                               994 non-null    int64
 6   Group counseling for substance abuse treatment         994 non-null    int64
 7   Health behavior intervention                           994 non-null    int64
dtypes: int64(8)
memory usage: 69.9 KB


In [9]:
## Output Variables

In [10]:
# Alcohol
query = '''
SELECT  distinct person_id, c.concept_name as alcohol_abuse
from
  condition_occurrence icd
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%alcohol abuse%'
  and domain_id = 'Condition'
'''
df_alc = pd.read_sql_query(query,sqliteconn)
df_alc.set_index('person_id',inplace=True)
df_alc.sort_index(inplace=True)
df_alc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 319 entries, 4 to 7420
Data columns (total 1 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   alcohol_abuse  319 non-null    object
dtypes: object(1)
memory usage: 5.0+ KB


In [11]:
# Cocaine
query = '''
SELECT  distinct person_id, c.concept_name as cocaine_abuse
from 
  condition_occurrence icd
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%cocaine abuse%'
  and domain_id = 'Condition'
'''
df_coke= pd.read_sql_query(query,sqliteconn)
df_coke.set_index('person_id',inplace=True)
df_coke.sort_index(inplace=True)
df_coke.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 182 entries, 126 to 7389
Data columns (total 1 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   cocaine_abuse  182 non-null    object
dtypes: object(1)
memory usage: 2.8+ KB


In [12]:
# Hallucinogen
query = '''
SELECT  distinct person_id, c.concept_name as hallucinogen_abuse
from 
  condition_occurrence icd
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%hallucinogen abuse%'
  and domain_id = 'Condition'
'''
df_hall= pd.read_sql_query(query,sqliteconn)
df_hall.set_index('person_id',inplace=True)
df_hall.sort_index(inplace=True)
df_hall.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 770 to 5397
Data columns (total 1 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   hallucinogen_abuse  3 non-null      object
dtypes: object(1)
memory usage: 48.0+ bytes


In [13]:
# Inhalant
query = '''
SELECT  distinct person_id, c.concept_name as inhalant_abuse
from
  condition_occurrence icd
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%inhalant abuse%'
  and domain_id = 'Condition'
'''
df_in= pd.read_sql_query(query,sqliteconn)
df_in.set_index('person_id',inplace=True)
df_in.sort_index(inplace=True)
df_in.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 2658 to 2658
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   inhalant_abuse  1 non-null      object
dtypes: object(1)
memory usage: 16.0+ bytes


In [14]:
# Opioid
query = '''
SELECT  distinct person_id, c.concept_name as opioid_abuse
from 
  condition_occurrence icd
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%opioid abuse%'
  and domain_id = 'Condition'
'''
df_op = pd.read_sql_query(query,sqliteconn)
df_op.set_index('person_id',inplace=True)
df_op.sort_index(inplace=True)
df_op.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 106 entries, 158 to 7368
Data columns (total 1 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   opioid_abuse  106 non-null    object
dtypes: object(1)
memory usage: 1.7+ KB


In [15]:
# Sedative
query = '''
SELECT  distinct person_id, c.concept_name as sedative_abuse
from 
  condition_occurrence icd
  join concept c on c.concept_id = icd.condition_concept_id
  where vocabulary_id = 'SNOMED'
  and lower(concept_name) like '%sedative abuse%'
  and domain_id = 'Condition'
'''
df_sed = pd.read_sql_query(query,sqliteconn)
df_sed.set_index('person_id',inplace=True)
df_sed.sort_index(inplace=True)
df_sed.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14 entries, 286 to 7185
Data columns (total 1 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   sedative_abuse  14 non-null     object
dtypes: object(1)
memory usage: 224.0+ bytes


In [16]:
# Combine the ouput variable dataframes
df_out = df_alc.join([df_coke,df_hall,df_in,df_op,df_sed],how='outer')
df_out.sort_index(inplace=True)
df_out.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 538 entries, 4 to 7420
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   alcohol_abuse       323 non-null    object
 1   cocaine_abuse       182 non-null    object
 2   hallucinogen_abuse  3 non-null      object
 3   inhalant_abuse      1 non-null      object
 4   opioid_abuse        107 non-null    object
 5   sedative_abuse      15 non-null     object
dtypes: object(6)
memory usage: 29.4+ KB


In [17]:
## COMBINING ALL OF THE TABLES BY PATIENT_ID
df = df_patient.join([df_drug,df_condition,df_observation,df_procedure,df_out],how='left')
df = df.sort_index()
# standardize the df's zero values - convert to NaN and then 0
df = df.replace({'0':np.nan, 0:np.nan})
df = df.fillna(0)
# convert categorical variables to integer values
df.iloc[:,5:] = df.iloc[:,5:].astype(bool).astype(int)
# convert variable to string values (for label encoding)
df['ingredient'] = df['ingredient'].astype(str)

# save unprocessed dataframe to csv for EDA in code file 2
df.to_csv('df_unprocessed.csv')

# scale continuous numeric variables
scaler = MinMaxScaler()
df['age'] = scaler.fit_transform(df['age'].values.reshape(-1,1))
# label encode discrete categorical variables
# (assigns a distinct value to each category in the variable)
label_list = ['gender','race','ethnicity','ingredient']
le = LabelEncoder()
for i in label_list:
    df[i] = le.fit_transform(df[i])

In [18]:
## CHECKING THE FINAL TABLE
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

# save to csv file for use in other files
df.to_csv('df.csv') 
# print info and first 100 lines of df
df.info()
df.head(100)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7440 entries, 1 to 7428
Data columns (total 55 columns):
 #   Column                                                                          Non-Null Count  Dtype  
---  ------                                                                          --------------  -----  
 0   gender                                                                          7440 non-null   int64  
 1   race                                                                            7440 non-null   int64  
 2   ethnicity                                                                       7440 non-null   int64  
 3   age                                                                             7440 non-null   float64
 4   ingredient                                                                      7440 non-null   int64  
 5   Bipolar disorder, depressed                                                     7440 non-null   int64  
 6   Bipolar disorder

Unnamed: 0_level_0,gender,race,ethnicity,age,ingredient,"Bipolar disorder, depressed","Bipolar disorder, manic","Bipolar disorder, mixed","Bipolar disorder, unspecified","Bipolar I disorder, unspecified","Bipolar I disorder, depressed","Bipolar I disorder, manic","Bipolar I disorder, mixed","Bipolar II disorder, unspecified",Psychosis and severe depression co-occurrent,Schizoaffective disorder,Abnormal weight gain,Abnormal weight loss,"Adult physical abuse, confirmed, initial encounter",Alcohol abuse counseling and surveillance of alcoholic,Anorexia,Binge eating disorder,Electroconvulsive therapy (ECT) provided (MDD),"Encounter for general psychiatric examination, requested by authority",Family history of alcohol abuse and dependence,History of physical abuse,Loss of weight,"Other personal history of psychological trauma, not elsewhere classified",Personal history of alcoholism,Personal history of nicotine dependence,Personal history of other mental and behavioral disorders,Personal history of tobacco use,"Suicide attempt, initial encounter",Very low level of personal hygiene,underweight (BMI < 19.9),healthy weight (20.0 < BMI < 24.9),overweight (25.0 < BMI < 29.9),obese (BMI > 30.0),Elevated blood pressure reading,Encounter for screening examination for mental health and behavioral disorders,Intentional self-harm,"Annual depression screening, 15 minutes",Detoxification Services for Substance Abuse Treatment,Electroconvulsive therapy,Family psychotherapy,Group psychotherapy,Individual psychotherapy,Group counseling for substance abuse treatment,Health behavior intervention,alcohol_abuse,cocaine_abuse,hallucinogen_abuse,inhalant_abuse,opioid_abuse,sedative_abuse
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1
1,1,4,1,0.259259,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,4,1,0.648148,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,2,1,0.888889,9,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,4,1,0.333333,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
5,1,4,1,0.240741,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0,4,1,0.833333,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,4,1,0.574074,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
8,1,2,1,0.685185,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,4,1,0.222222,6,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10,0,4,1,0.648148,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
