## Data Exploration (ML50-2023) - Mateus

### __Importing libraries__

In [259]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.stats import chi2_contingency
from sklearn.preprocessing import LabelEncoder

In [260]:
# Data path : ../Data/test.csv
test_path = os.path.join('..', 'Data', 'test.csv')
train_path = os.path.join('..', 'Data', 'train.csv')
icd_9_path = os.path.join('..', 'Data', 'icd9_codes.txt')

# Read data
test = pd.read_csv(test_path, index_col='encounter_id')
train = pd.read_csv(train_path, index_col='encounter_id')
icd_9 = pd.read_csv(icd_9_path, sep=',', encoding='ISO-8859-1', index_col=0)
icd_9['3 digit code'] = icd_9.index.str[:3]
icd_9.set_index('3 digit code', inplace=True)
# Delete duplicates in the index
icd_9 = icd_9[~icd_9.index.duplicated(keep='first')]

test_ids = test.index
train_ids = train.index

# Data will have to be treated as a whole, so we will concatenate the test and training data
data = pd.concat([train, test], axis=0)


### __I. Selecting and understanding the data__
- Check for non-varying features 
- Intuition-based feature selection

In [261]:
data.head(5).T

encounter_id,533253,426224,634063,890610,654194
country,USA,USA,USA,USA,USA
patient_id,70110,29775006,80729253,2919042,84871971
race,Caucasian,AfricanAmerican,Caucasian,AfricanAmerican,Caucasian
gender,Female,Male,Female,Male,Female
age,[70-80),[50-60),[60-70),[60-70),[70-80)
weight,?,?,?,?,?
payer_code,?,?,?,MC,HM
outpatient_visits_in_previous_year,0,0,0,0,1
emergency_visits_in_previous_year,0,0,0,0,0
inpatient_visits_in_previous_year,2,0,1,1,0


1. Non varying features

In [262]:
# We can immediately see that there is only one country, USA, so we can drop that non-informative column
data.drop('country', axis=1, inplace=True)

2. Gender is probably not relevant, so let's check for independence between gender and the target variable.

In [263]:
# Perform a chi-sq test for independence between the gender and the target variable
pre_contingency = data.groupby('gender')['readmitted_binary'].value_counts()
table = pre_contingency.unstack().drop('Unknown/Invalid', axis=0)

chi2, p, dof, expected = chi2_contingency(table)

# Print if we can reject the null hypothesis
print('p-value: ', round(p,3), '> 0.05' if p > 0.05 else '< 0.05')
if p < 0.05:
    print('We reject the null hypothesis, the variables are dependent')
else:
    print('We cannot reject the null hypothesis, the variables are independent')

p-value:  0.121 > 0.05
We cannot reject the null hypothesis, the variables are independent


In [264]:
# We, therefore drop the gender column
data.drop('gender', axis=1, inplace=True)

3. Lets do the same for race

In [265]:
table = data.groupby('race')['readmitted_binary'].value_counts().unstack()
table = table.drop('?', axis=0)
chi2, p, dof, expected = chi2_contingency(table)

# Print if we can reject the null hypothesis
print('p-value: ', round(p,3), '> 0.05' if p > 0.05 else '< 0.05')
if p < 0.05:
    print('We reject the null hypothesis, the variables are dependent')
else:
    print('We cannot reject the null hypothesis, the variables are independent')

p-value:  0.039 < 0.05
We reject the null hypothesis, the variables are dependent


### __II. Feature encoding__
- This is the first step for our analysis, as we need to transform the categorical variables/binary into numerical ones. We will use the __LabelEncoder__ from the __sklearn.preprocessing__ library to do so.

- We will also convert intervals to middle values, so we can treat them as numerical variables.

- Medication is not a useful feature as it is, so we will create many new dummy features from it.

0. Diagnosis codes (ICD9) - 3 digits, are not understandable, so we will convert them to their respective disease names. This is actually decoding, but we will need this later on

In [266]:
# Adding a 0 to the codes that have only two digits
data['primary_diagnosis'] = data['primary_diagnosis'].apply(lambda x: '0' + x if len(x) == 2 else x)
data['primary_diagnosis'] = data['primary_diagnosis'].apply(lambda x: '00' + x if len(x) == 1 and x!='?' else x)
data['primary_diagnosis'] = data['primary_diagnosis'].apply(lambda x: x if len(x) == 3 else x[:3])
data['secondary_diagnosis'] = data['secondary_diagnosis'].apply(lambda x: '0' + x if len(x) == 2 else x)
data['secondary_diagnosis'] = data['secondary_diagnosis'].apply(lambda x: '00' + x if len(x) == 1 and x != '?' else x)
data['secondary_diagnosis'] = data['secondary_diagnosis'].apply(lambda x: x if len(x) == 3 else x[:3])
data['additional_diagnosis'] = data['additional_diagnosis'].apply(lambda x: '0' + x if len(x) == 2 else x)
data['additional_diagnosis'] = data['additional_diagnosis'].apply(lambda x: '00' + x if len(x) == 1 and x != '?' else x)
data['additional_diagnosis'] = data['additional_diagnosis'].apply(lambda x: x if len(x) == 3 else x[:3])

# This will ease our work later on
data['primary_diagnosis_description'] = data['primary_diagnosis'].map(icd_9['long_description'])
data['secondary_diagnosis_description'] = data['secondary_diagnosis'].map(icd_9['long_description'])
data['additional_diagnosis_description'] = data['additional_diagnosis'].map(icd_9['long_description'])

1. Encoding the variables - to binary

In [267]:
# Transform the target variable, and two meds features into a binary variable
data['readmitted_binary'] = data['readmitted_binary'].map({'No': 0, 'Yes': 1})
data['prescribed_diabetes_meds'] = data['prescribed_diabetes_meds'].map({'No': 0, 'Yes': 1})
data['change_in_meds_during_hospitalization'] = data['change_in_meds_during_hospitalization'].map({'No': 0, 'Ch': 1})

2. Encoding the variables - to class

In [268]:
# First we need to convert ? to NaN
# Print which features contain '?'
print('Features with ? : ', data.columns[data.isin(['?']).any()].tolist())

# Replace ? with NaN
data.replace('?', np.nan, inplace=True)

Features with ? :  ['race', 'weight', 'payer_code', 'medical_specialty', 'primary_diagnosis', 'secondary_diagnosis', 'additional_diagnosis']


In [299]:
# Encode the following variables: race, payer_code, admission_type, medical_specialty, discharge_disposition, admission_source
features_to_encode = ['race', 'payer_code', 'admission_type', 'medical_specialty', 'discharge_disposition', 'admission_source']
encoder_dict = {features_to_encode[i]: LabelEncoder() for i in range(len(features_to_encode))}
for feature in features_to_encode:
    encoder_dict[feature].fit(data[feature])
    data[feature] = encoder_dict[feature].transform(data[feature].values)
    # Convert the class of Nan to Nan, which is always the last
    data[feature].replace(encoder_dict[feature].transform(encoder_dict[feature].classes_)[-1], np.nan, inplace=True)
    print('Classes: ', dict(zip(encoder_dict[feature].classes_, encoder_dict[feature].transform(encoder_dict[feature].classes_))))

Classes:  {'AfricanAmerican': 0, 'Asian': 1, 'Caucasian': 2, 'Hispanic': 3, 'Other': 4, nan: 5}
Classes:  {'BC': 0, 'CH': 1, 'CM': 2, 'CP': 3, 'DM': 4, 'FR': 5, 'HM': 6, 'MC': 7, 'MD': 8, 'MP': 9, 'OG': 10, 'OT': 11, 'PO': 12, 'SI': 13, 'SP': 14, 'UN': 15, 'WC': 16, nan: 17}
Classes:  {'Elective': 0, 'Emergency': 1, 'Newborn': 2, 'Not Available': 3, 'Not Mapped': 4, 'Trauma Center': 5, 'Urgent': 6, nan: 7}
Classes:  {'AllergyandImmunology': 0, 'Anesthesiology': 1, 'Anesthesiology-Pediatric': 2, 'Cardiology': 3, 'Cardiology-Pediatric': 4, 'DCPTEAM': 5, 'Dentistry': 6, 'Dermatology': 7, 'Emergency/Trauma': 8, 'Endocrinology': 9, 'Endocrinology-Metabolism': 10, 'Family/GeneralPractice': 11, 'Gastroenterology': 12, 'Gynecology': 13, 'Hematology': 14, 'Hematology/Oncology': 15, 'Hospitalist': 16, 'InfectiousDiseases': 17, 'InternalMedicine': 18, 'Nephrology': 19, 'Neurology': 20, 'Neurophysiology': 21, 'Obsterics&Gynecology-GynecologicOnco': 22, 'Obstetrics': 23, 'ObstetricsandGynecology': 

3. Transforming age into a numerical variable

In [220]:
data['age'].replace({'[0-10)': 5, '[10-20)': 15, '[20-30)': 25, '[30-40)': 35, '[40-50)': 45, '[50-60)': 55, '[60-70)': 65, '[70-80)': 75, '[80-90)': 85, '[90-100)': 95}, inplace=True)

4. Transforming medication into dummy variables

In [221]:
# Transforming the medication list into many dummy variables
meds = data['medication'].str.get_dummies(sep=',')

# Remove any symbols from the column names
to_remove = ['[', ']', "'", ' ']
for symbol in to_remove:
    meds.columns = meds.columns.str.replace(symbol, '', regex=True)

Removing dupplicates

In [222]:
unique_cols = meds.columns.unique()
new_meds = pd.DataFrame(columns=unique_cols)
for col in unique_cols:
    if type(meds[col]) == pd.Series:
        new_meds[col] = meds[col]
    else:
        new_meds[col] = meds[col].max(axis=1)
meds = new_meds
meds

Unnamed: 0_level_0,acarbose,chlorpropamide,glimepiride,glimepiride-pioglitazone,glipizide,glipizide-metformin,glyburide,glyburide-metformin,insulin,metformin-pioglitazone,...,nateglinide,pioglitazone,repaglinide,rosiglitazone,tolazamide,tolbutamide,troglitazone,acetohexamide,metformin,Unnamed: 21_level_0
encounter_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
533253,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
426224,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
634063,0,0,1,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
890610,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
654194,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
451150,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
549298,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
327586,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
351214,0,0,0,0,1,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0


Handling combinations of drugs (containing the '-' symbol)

In [223]:
# Lets delete combinations of drugs
for col in meds.columns.unique():
    if '-' in col:
        meds.drop(col, axis=1, inplace=True)
meds

Unnamed: 0_level_0,acarbose,chlorpropamide,glimepiride,glipizide,glyburide,insulin,miglitol,nateglinide,pioglitazone,repaglinide,rosiglitazone,tolazamide,tolbutamide,troglitazone,acetohexamide,metformin,Unnamed: 17_level_0
encounter_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
533253,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
426224,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
634063,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
890610,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
654194,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
451150,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
549298,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
327586,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
351214,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0


In [224]:
# Rename the last column to avoid confusion
meds.rename(columns={'' : 'No_meds'}, inplace=True)

In [225]:
data = pd.concat([data,meds], axis=1)

### __III. Taking care of missing data__
Lets check for which values are missing (in percentage of the total dataframe size)

In [226]:
def get_missing_per(data):
    isna = (100 * data.isna().sum().sort_values(ascending=False) / len(data))
    isna = isna[isna != 0]

    missing_counts = 100 *data.apply(lambda x: x.value_counts().get('?', 0)).sort_values(ascending=False) / len(data)
    missing_counts = missing_counts[missing_counts != 0]

    return pd.concat([isna, missing_counts], axis=0).sort_values(ascending=False)

get_missing_per(data)

weight                              96.858479
glucose_test_result                 94.746772
a1c_test_result                     83.277322
readmitted_multiclass               30.000197
readmitted_binary                   30.000197
age                                  4.999705
additional_diagnosis_description     1.398306
additional_diagnosis                 1.398306
secondary_diagnosis_description      0.351787
secondary_diagnosis                  0.351787
primary_diagnosis_description        0.020636
primary_diagnosis                    0.020636
dtype: float64

#### __0. Taking care of variables with high % of missing values__
We have many variables with a missing value count upwards of 30%, which should be removed. Lets check how they are, before removing them.

##### 1. *Weight*

In [227]:
data.groupby('weight')['readmitted_binary'].mean().sort_values(ascending=False)

weight
[0-25)       0.171429
[175-200)    0.142857
?            0.111697
[50-75)      0.111635
[100-125)    0.111359
[75-100)     0.109325
[25-50)      0.104478
[150-175)    0.095238
[125-150)    0.052083
>200         0.000000
Name: readmitted_binary, dtype: float64

In [228]:
data.groupby('weight')['prescribed_diabetes_meds'].mean().sort_values(ascending=False)

weight
>200         1.000000
[175-200)    0.909091
[0-25)       0.791667
?            0.772677
[125-150)    0.765517
[150-175)    0.714286
[75-100)     0.694611
[100-125)    0.684800
[50-75)      0.665552
[25-50)      0.628866
Name: prescribed_diabetes_meds, dtype: float64

Although there seems to be some variance in the weight, with respect to the target variable, the missing values are too many to be imputed. We will remove this variable, as it might introduce bias in the model.

##### 2. *Glucose test results*

In [229]:
print(data.groupby('glucose_test_result')['readmitted_binary'].mean().sort_values(ascending=False))
print(data.groupby('a1c_test_result')['readmitted_binary'].mean().sort_values(ascending=False))

glucose_test_result
>300    0.142684
>200    0.130806
Norm    0.121816
Name: readmitted_binary, dtype: float64
a1c_test_result
>7      0.101551
Norm    0.099914
>8      0.096231
Name: readmitted_binary, dtype: float64


These two variables are very important to diagnose diabetes, and we can assume that if the test is Nan, it was not performed (probably because the patient was already known to be diabetic). We will impute the missing values with Not Performed.

We are also going to rename the columns to a1c_test_result and glucose_test_result, for better readability, and according to the following article:
<br> '*[A] diagnosis of diabetes can be made when the A1C exceeds 6.5% or when a random glycose level in a patient with classic symptoms exceeds 200 mg/dL.*' (https://www.ncbi.nlm.nih.gov/books/NBK551501/), meaning:
1. We can encode the a1c_test_result as 0 for 'Norma' and 1 for 'Abnormal', since the a1c values range from Norm, >7, >8. 
2. We will also use the glycose reading, with a threshold of >300mg/dL to encode it as 1, and 0 otherwise.
3. We can then join both dataframes, creating a new column name diabetes

In [230]:
glucose_map = {'Norm': 'Normal', '>200': 'Probably diabetic', '>300': 'Diabetic'}
a1c_map = {'Norm': 'Normal', '>7': 'Diabetic', '>8': 'Diabetic'}
data['glucose_test_result'] = data['glucose_test_result'].map(glucose_map).fillna('Not tested')
data['a1c_test_result'] = data['a1c_test_result'].map(a1c_map).fillna('Not tested')
print(data['glucose_test_result'].value_counts(), '\n')
print(data['a1c_test_result'].value_counts())

Not tested           96420
Normal                2597
Probably diabetic     1485
Diabetic              1264
Name: glucose_test_result, dtype: int64 

Not tested    84748
Diabetic      12028
Normal         4990
Name: a1c_test_result, dtype: int64


##### 3. *Medical specialty*

In [231]:
data.groupby('medical_specialty')['readmitted_binary'].mean().sort_values(ascending=False).head(5)

medical_specialty
55    0.500000
1     0.500000
39    0.250000
15    0.207547
16    0.202614
Name: readmitted_binary, dtype: float64

In [232]:
data[['medical_specialty','primary_diagnosis_description']].value_counts(dropna=False).head(10)

medical_specialty  primary_diagnosis_description                                                                               
0                  Diabetes mellitus without mention of complication - type II or unspecified type - not stated as uncontrolled    4179
                   Congestive heart failure - unspecified                                                                          3552
                   Coronary atherosclerosis of unspecified type of vessel - native or graft                                        2821
                   Acute myocardial infarction of anterolateral wall - episode of care unspecified                                 1998
                   Respiratory abnormality - unspecified                                                                           1876
4                  Coronary atherosclerosis of unspecified type of vessel - native or graft                                        1839
0                  Pneumonia - organism unspecified     

Medical speciality is a tricky one. We have a lot of missing values, but it can explain some of the variance in the target variable. We could try to input the missing values with the help of the primary_diagnosis variable, but then we would just have the same data twice. We will remove this variable.

##### 4. *Insurance provider code*

In [233]:
data['payer_code'].value_counts(dropna=False).head(5)

0     40256
8     32439
7      6274
15     5007
1      4655
Name: payer_code, dtype: int64

In [234]:
data.groupby('payer_code')['readmitted_binary'].mean().sort_values(ascending=False).head(10)

payer_code
14    0.133333
10    0.129630
9     0.125201
8     0.118900
11    0.115226
0     0.115209
5     0.108466
15    0.100537
7     0.097939
1     0.093864
Name: readmitted_binary, dtype: float64

There doesn't seem to be enough explained variance in the target variable to justify the use of this variable. We will remove it.

##### 5. Removing variables mentioned above

In [235]:
data = data.drop(['weight', 'payer_code', 'medical_specialty'], axis=1, errors='ignore')

#### __1. Taking care of the target variable__

Lets check if we can extract information from readmitted_binary to fill in the missing values in readmitted_multiclass (and *vice-versa*).

In [236]:
get_missing_per(data)

readmitted_multiclass               30.000197
readmitted_binary                   30.000197
age                                  4.999705
additional_diagnosis_description     1.398306
additional_diagnosis                 1.398306
secondary_diagnosis_description      0.351787
secondary_diagnosis                  0.351787
primary_diagnosis_description        0.020636
primary_diagnosis                    0.020636
dtype: float64

In [237]:
# Our target variable is readmitted_binary and readmitted_multiclass. From multiclass we can derive binary, so lets check if they're overlapping
data['readmitted_binary'][data['readmitted_binary'].isna()].index.equals(data['readmitted_multiclass'][data['readmitted_binary'].isna()].index)

True

Since the readmitted_binary missing values coincide with the readmitted_multiclass missing values, we can't use one to fill in the other. We will remove the missing values.

In [238]:
# They are indeed, so we have to drop the missing values from both
data = data.dropna(subset=['readmitted_binary', 'readmitted_multiclass'])

#### __2. Handling the admission variables__

- What are the admission_sources values? What about the admission types?

In [239]:
# Lets now check our isna() values again
get_missing_per(data)

age                                 4.993262
additional_diagnosis_description    1.415015
additional_diagnosis                1.415015
secondary_diagnosis_description     0.367792
secondary_diagnosis                 0.367792
primary_diagnosis_description       0.022461
primary_diagnosis                   0.022461
dtype: float64

In [240]:
print(data.groupby('admission_type', dropna=False)['readmitted_binary'].agg(['mean', 'count']).sort_values(by='mean', ascending=False))
print(data.groupby('admission_source', dropna=False)['readmitted_binary'].agg(['mean', 'count']).sort_values(by='mean', ascending=False))

                    mean  count
admission_type                 
1               0.114938  37742
6               0.112101  13024
7               0.109552   3706
3               0.106928   3320
0               0.103853  13211
4               0.088785    214
2               0.000000      6
5               0.000000     13
                      mean  count
admission_source                 
0                 0.181818     11
13                0.155039    129
4                 0.140187    107
8                 0.127731    595
11                0.125000      8
1                 0.116620  40319
16                0.106825   4718
12                0.105263    779
5                 0.105184  20678
9                 0.097311   1562
15                0.095964   2230
3                 0.079545     88
6                 0.000000      1
7                 0.000000      2
10                0.000000      7
2                 0.000000      1
14                0.000000      1


Not Available is the same as Nan, so lets replace Not available with nan for now

In [241]:
data['admission_type'] = data['admission_type'].apply(lambda x: None if x == 'Not Available' else x)
data['admission_source'] = data['admission_source'].apply(lambda x: None if x == ' Not Available' else x)

Lets use a predictive model to impute the Not Available values in admission_type and source. Before we do that, we need to encode the categorical variables.

Now for the KNN imputation, we need to encode all the categorical variables.

In [242]:
data

Unnamed: 0_level_0,patient_id,race,age,outpatient_visits_in_previous_year,emergency_visits_in_previous_year,inpatient_visits_in_previous_year,admission_type,average_pulse_bpm,discharge_disposition,admission_source,...,nateglinide,pioglitazone,repaglinide,rosiglitazone,tolazamide,tolbutamide,troglitazone,acetohexamide,metformin,No_meds
encounter_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
533253,70110,3,75.0,0,0,2,1,127,1,1,...,0,0,0,0,0,0,0,0,0,1
426224,29775006,1,55.0,0,0,0,1,128,4,12,...,0,0,0,0,0,0,0,0,0,0
634063,80729253,3,65.0,0,0,1,7,94,1,16,...,0,0,0,0,0,0,0,0,0,0
890610,2919042,1,65.0,0,0,1,1,81,1,9,...,0,0,0,0,0,0,0,0,0,1
654194,84871971,3,75.0,1,0,0,0,126,11,5,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660160,24531381,1,85.0,0,1,0,0,90,1,5,...,0,0,0,0,0,0,0,0,1,0
826429,4663818,1,75.0,0,0,0,6,98,25,5,...,0,0,0,0,0,0,0,0,1,0
332030,23397147,3,65.0,0,2,2,7,137,23,16,...,0,0,0,0,0,0,0,0,0,0
757560,52161750,3,65.0,0,0,2,1,123,3,1,...,0,0,0,0,0,0,0,0,0,0
