In [84]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from matplotlib import pyplot as plt
import feather
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [10]:
pd.options.display.max_rows = 20
pd.options.display.max_columns = 200

In [11]:
#list columns to include
include = ['CaseID', 'SEX', 'RACE_NEW', 'ETHNICITY_HISPANIC', 'PRNCPTX', 'CPT', 'WORKRVU', 'Age', 'ANESTHES', 'SURGSPEC', 'ELECTSURG', 'EMERGNCY', 'WNDCLAS', 'ASACLAS', 'DIABETES', 'SMOKE', 'DYSPNEA', 'FNSTATUS2', 'VENTILAT', 'HXCOPD', 'ASCITES', 'HXCHF', 'HYPERMED', 'RENAFAIL', 'DIALYSIS', 'DISCANCR', 'WNDINF', 'STEROID', 'WTLOSS', 'BLEEDDIS', 'TRANSFUS', 'PRSEPIS', 'HEIGHT', 'WEIGHT', 'PRSODM', 'PRBUN', 'PRCREAT', 'PRALBUM', 'PRBILI', 'PRSGOT', 'PRALKPH', 'PRWBC', 'PRHCT', 'PRPLATE', 'PRPTT', 'PRINR', 'PRPT', 'DOpertoD', 'OTHERCPT1', 'OTHERCPT2', 'OTHERCPT3', 'OTHERCPT4', 'OTHERCPT5', 'OTHERCPT6', 'OTHERCPT7', 'OTHERCPT8', 'OTHERCPT9', 'OTHERCPT10', 'CONCPT1', 'CONCPT2', 'CONCPT3', 'CONCPT4', 'CONCPT5', 'CONCPT6', 'CONCPT7', 'CONCPT8', 'CONCPT9', 'CONCPT10', 'INOUT', 'TRANST', 'OperYR', 'OPTIME', 'HtoODay', 'SSSIPATOS', 'DSSIPATOS', 'OSSIPATOS', 'PNAPATOS', 'VENTPATOS', 'UTIPATOS', 'SEPSISPATOS', 'SEPSHOCKPATOS', 'OPTIME','DISCHDEST','READMISSION1','READMPODAYS1', 'READMSUSPREASON1', 'READMUNRELSUSP1', 'READMRELICD91', 'READMRELICD101', 'READMUNRELICD91', 'READMUNRELICD101','TOTHLOS','STILLINHOSP','OTHCDIFF']

post_op = ['OUPNEUMO', 'CDARREST', 'CDMI', 'SUPINFEC', 'WNDINFD', 'ORGSPCSSI', 'URNINFEC', 'OTHDVT', 'PULEMBOL', 'RENAINSF', 'OPRENAFL', 'DEHIS', 'REINTUB', 'FAILWEAN', 'CNSCVA', 'OTHSYSEP', 'OTHBLEED', 'OTHSESHOCK', 'RETURNOR']

d_post_op = ['DSUPINFEC', 'DWNDINFD','DORGSPCSSI','DDEHIS','DOUPNEUMO','DREINTUB','DPULEMBOL','DFAILWEAN','DRENAINSF','DOPRENAFL','DURNINFEC','DCNSCVA','DCDARREST','DCDMI','DOTHBLEED','DOTHDVT','DOTHSYSEP', 'DOTHSESHOCK','RETORPODAYS']


col_features = ['COL_STEROID',
  'COL_MECH_BOWEL_PREP',
  'COL_ORAL_ANTIBIOTIC',
  'COL_CHEMO',
  'COL_INDICATION',
  'COL_EMERGENT',
 'COL_ICD9_EMERGENT',
 'COL_APPROACH',
 'COL_MARGINS',
  'COL_MALIGNANCYT',
 'COL_MALIGNANCYN',
 'COL_MALIGNANCYM',
 'COL_ANASTOMOTIC',
 'COL_ILEUS',
  'COL_NODESEVAL',
  'COL_ICD10_EMERGENT']

In [12]:
#make all column names capital and combine the two lists
incl = list(map(lambda x:x.upper(), include))
total = col_features + incl + post_op + d_post_op

In [13]:
post_op.append('READMISSION1')

In [14]:
#make sure CPT's are read in as strings
CPT_dict = {'OTHERCPT1': str, 'OTHERCPT2':str, 'OTHERCPT3':str, 'OTHERCPT4':str, 'OTHERCPT5':str, 'OTHERCPT6':str, 'OTHERCPT7':str, 'OTHERCPT8':str, 'OTHERCPT9':str, 'OTHERCPT10':str, 'CONCPT1':str, 'CONCPT2':str,'CONCPT3':str,'CONCPT4':str,'CONCPT5':str,'CONCPT6':str,'CONCPT7':str,'CONCPT8':str,'CONCPT9':str,'CONCPT10':str, 'REOPORCPT1':str, 'REOPOR2CPT1':str}

In [8]:
#read in the data
data = pd.read_csv(r'/home/kchen/Documents/nsqip_raw/procol/procol.csv', index_col='CASEID', usecols=total, dtype=CPT_dict, low_memory=False, na_values=[-99, '-99', 'Unknown'])

In [15]:
data.shape

(276232, 146)

In [16]:
#list CPT codes including ostomy placement and create column 'ost' marking ostomy
ost = [44211, 44212, 45113, 45119, 44155, 44157, 44158, 44125, 44187, 44141, 44143, 44144, 44146, 44150, 44151, 44206, 44208, 44210, 44187, 44188, 44320, 44310]
for CPT in ost:
    data.loc[data['CPT'] == CPT, 'ost'] = 1
data['ost'].value_counts(dropna=False)

NaN    219122
1.0     57110
Name: ost, dtype: int64

In [17]:
#create a list of CPT's as strings and search other+concurrent CPT for those
str_ost = []
for x in ost:  
    str_ost.append(str(x))
for i in range(1,11):
    for CPT in str_ost:
        data.loc[data['OTHERCPT%s' % i].str.contains(CPT, na=False), 'ost'] = 1
        data.loc[data['CONCPT%s' % i].str.contains(CPT, na=False), 'ost'] = 1
data['ost'].fillna(0, inplace=True)
data['ost'].value_counts(dropna=False)

0.0    203862
1.0     72370
Name: ost, dtype: int64

In [18]:
#same thing for ileostomy only
dli = [44211, 44212, 45113, 45119, 44155, 44157, 44158, 44125, 44187, 44310]
str_dli = []
for x in dli:  
    str_dli.append(str(x))
for CPT in dli:
    data.loc[data['CPT'] == CPT, 'dli'] = 1
data['dli'].value_counts(dropna=False)
for i in range(1,11):
    for CPT in str_dli:
        data.loc[data['OTHERCPT%s' % i].str.contains(CPT, na=False), 'dli'] = 1
        data.loc[data['CONCPT%s' % i].str.contains(CPT, na=False), 'dli'] = 1
data['dli'].fillna(0, inplace=True)
data['dli'].value_counts(dropna=False)

0.0    255971
1.0     20261
Name: dli, dtype: int64

In [19]:
data.loc[(data['OPERYR'] == 2018) & (data['READMISSION1'].isnull()), 'READMISSION1'] = 'No'
data.groupby('OPERYR')['READMISSION1'].value_counts(dropna=False)

OPERYR  READMISSION1
2012.0  No              15260
        Yes              1721
2013.0  No              19270
        Yes              2235
2014.0  No              22451
        Yes              2615
        NaN               196
2015.0  No              28076
        Yes              3231
2016.0  No              35871
        Yes              4254
2017.0  No              38273
        Yes              4442
2018.0  No              41241
        Yes              4766
2019.0  No              45888
        Yes              5362
Name: READMISSION1, dtype: int64

In [20]:
#drop patients missing primary outcome (readmission)
data.dropna(axis=0, subset=['READMISSION1'], inplace=True)
data['READMISSION1'].value_counts(dropna=False)

No     246330
Yes     28626
Name: READMISSION1, dtype: int64

In [21]:
#label encode primary outcome
le=LabelEncoder()
data['READMISSION1']=le.fit_transform(data['READMISSION1'])
data['READMISSION1'].value_counts()

0    246330
1     28626
Name: READMISSION1, dtype: int64

In [22]:
#drop patients discharged to acute care, hospice, or who died
data = data[data['DISCHDEST']!='Separate Acute Care']
data = data[data['DISCHDEST']!='Hospice']
data = data[data['DISCHDEST']!='Expired']
data['DISCHDEST'].value_counts(dropna=False)

Home                            236053
Skilled Care, Not Home           18462
Rehab                             7889
Facility Which was Home           2119
NaN                               1378
Unskilled Facility Not Home        324
Against Medical Advice (AMA)       148
Multi-level Senior Community        28
Name: DISCHDEST, dtype: int64

In [23]:
#Drop patients still in hospital at 30d
data = data[data['STILLINHOSP'] == 'No']

In [24]:
data.shape

(262745, 148)

#code to identify patients readmitted for dehydration
target = ['Progressive Renal Insufficiency', 'Acute Renal Failure']
icd9 = ['276.51', 'E904.2', '276.5', '584.5', '584.9', '584']
icd10 = ['E86.0', 'E86.9', 'N17.0', 'N17.9', 'N17.8']

data['dehyd'] = ((data['READMSUSPREASON1'].isin(target)) | (data['READMRELICD91'].isin(icd9)) | (data['READMRELICD101'].isin(icd10)))
data['dehyd'].value_counts()

data.groupby('READMISSION1')['dehyd'].value_counts(normalize=True)

data['readm_dehyd'] = np.where((data['READMISSION1'] == 1) & (data['dehyd'] == 1), 1, 0)

data['readm_dehyd'].value_counts(normalize=True)

In [25]:
#recode anastomotic leak to binary
data['COL_ANASTOMOTIC'].value_counts()

No definitive diagnosis of leak/leak related abscess       239330
No                                                          15559
Leak, treated w/ reoperation                                 3158
Leak, treated w/ interventional means                        1921
Leak, treated w/ non-interventional/non-operative means       974
Leak, no treatment intervention documented                    520
Yes-reoperation                                               273
Yes-percutaneous intervention                                 137
Yes-no intervention required                                  101
Leak, treated w/ non-interventional means                      79
Name: COL_ANASTOMOTIC, dtype: int64

In [26]:
data.loc[(data['COL_ANASTOMOTIC'] == 'No definitive diagnosis of leak/leak related abscess'), 'COL_ANASTOMOTIC'] = 0
data.loc[(data['COL_ANASTOMOTIC'] == 'No'), 'COL_ANASTOMOTIC'] = 0
data.loc[(data['COL_ANASTOMOTIC'] == 'Leak, no treatment intervention documented'), 'COL_ANASTOMOTIC'] = 0
data['COL_ANASTOMOTIC'].value_counts()

0                                                          255409
Leak, treated w/ reoperation                                 3158
Leak, treated w/ interventional means                        1921
Leak, treated w/ non-interventional/non-operative means       974
Yes-reoperation                                               273
Yes-percutaneous intervention                                 137
Yes-no intervention required                                  101
Leak, treated w/ non-interventional means                      79
Name: COL_ANASTOMOTIC, dtype: int64

In [27]:
#censor the anastomotic leak outcome if organ-space SSI ocurred after discharge
data.loc[(data['DORGSPCSSI'] >= data['READMPODAYS1']), 'COL_ANASTOMOTIC'] = 0
data['COL_ANASTOMOTIC'].value_counts()

0                                                          257798
Leak, treated w/ reoperation                                 2108
Leak, treated w/ interventional means                        1094
Leak, treated w/ non-interventional/non-operative means       661
Yes-reoperation                                               207
Yes-percutaneous intervention                                  90
Yes-no intervention required                                   84
Leak, treated w/ non-interventional means                      59
Name: COL_ANASTOMOTIC, dtype: int64

In [28]:
#
post_op = ['OUPNEUMO', 'CDARREST', 'CDMI', 'SUPINFEC', 'WNDINFD', 'ORGSPCSSI', 'URNINFEC', 'OTHDVT', 'PULEMBOL', 'RENAINSF', 'OPRENAFL', 'DEHIS', 'REINTUB', 'FAILWEAN', 'CNSCVA', 'OTHSYSEP', 'OTHBLEED', 'OTHSESHOCK']

d_post_op = ['DSUPINFEC', 'DWNDINFD','DORGSPCSSI','DDEHIS','DOUPNEUMO','DREINTUB','DPULEMBOL','DFAILWEAN','DRENAINSF','DOPRENAFL','DURNINFEC','DCNSCVA','DCDARREST','DCDMI','DOTHBLEED','DOTHDVT','DOTHSYSEP', 'DOTHSESHOCK']

In [29]:
#censor anastomotic leak if return to OR occurred after discharge
data.loc[(data['RETORPODAYS'] >= data['TOTHLOS']), 'COL_ANASTOMOTIC'] = 0
data['COL_ANASTOMOTIC'].value_counts()

0                                                          258095
Leak, treated w/ reoperation                                 1880
Leak, treated w/ interventional means                        1073
Leak, treated w/ non-interventional/non-operative means       648
Yes-reoperation                                               181
Yes-percutaneous intervention                                  89
Yes-no intervention required                                   82
Leak, treated w/ non-interventional means                      59
Name: COL_ANASTOMOTIC, dtype: int64

In [30]:
#binarize anastomotic leak
data.loc[(data['COL_ANASTOMOTIC'] != 0), 'COL_ANASTOMOTIC'] = 1

In [31]:
data['COL_ANASTOMOTIC'].value_counts()

0    258095
1      4650
Name: COL_ANASTOMOTIC, dtype: int64

In [32]:
#identify icd codes signifying readmission for ileus
icd9_ileus = ['560.9', '997.49']
icd10_ileus = ['K91.3', 'K56.6', 'K91.30', 'K56.60']

In [33]:
#create column identifying patients readmitted for ileus
data['readm_ileus'] = (((data['READMRELICD91'].isin(icd9_ileus)) | (data['READMRELICD101'].isin(icd10_ileus))))
data['readm_ileus'].value_counts()

False    261207
True       1538
Name: readm_ileus, dtype: int64

In [34]:
data['COL_ILEUS'].value_counts()

No     222537
Yes     39650
Name: COL_ILEUS, dtype: int64

In [35]:
#if patient readmitted for ileus, censor ileus outcome
data.loc[(data['readm_ileus'] == True), 'COL_ILEUS'] = 'No'
data['COL_ILEUS'].value_counts()

No     223469
Yes     38722
Name: COL_ILEUS, dtype: int64

In [36]:
data['ORGSPCSSI'].value_counts()

No Complication    249228
Organ/Space SSI     13517
Name: ORGSPCSSI, dtype: int64

In [37]:
data['RENAINSF'].value_counts()

No Complication                    260967
Progressive Renal Insufficiency      1778
Name: RENAINSF, dtype: int64

In [38]:
data['OPRENAFL'].value_counts()

No Complication        261866
Acute Renal Failure       879
Name: OPRENAFL, dtype: int64

In [39]:
#censor other post-op outcomes if they occurred after discharge
for x in post_op:
    data.loc[(data['D{}'.format(x)] >= data['TOTHLOS']), '{}'.format(x)] = 'No Complication'

In [40]:
data['ORGSPCSSI'].value_counts()

No Complication    255142
Organ/Space SSI      7603
Name: ORGSPCSSI, dtype: int64

In [41]:
data['RENAINSF'].value_counts()

No Complication                    261951
Progressive Renal Insufficiency       794
Name: RENAINSF, dtype: int64

In [42]:
data['OPRENAFL'].value_counts()

No Complication        262140
Acute Renal Failure       605
Name: OPRENAFL, dtype: int64

In [43]:
data['RETURNOR'].value_counts()

No     251341
Yes     11404
Name: RETURNOR, dtype: int64

In [44]:
#find the number of patients where 'RETORPODAYS' is greater than 'READMPODAYS1'
data.loc[(data['RETORPODAYS'] >= data['TOTHLOS']), 'RETURNOR'] = 'No'


In [45]:
data.loc[(data['RETORPODAYS'] >= data['TOTHLOS']), 'RETURNOR'] = 'No'

In [46]:
data['RETURNOR'].value_counts()

No     255477
Yes      7268
Name: RETURNOR, dtype: int64

In [47]:
#Create BMI column
lbs_to_kg_ratio = 0.453592
inch_to_meter_ratio = 0.0254

data['HEIGHT'] *= inch_to_meter_ratio
data['WEIGHT'] *= lbs_to_kg_ratio

data['BMI'] = data['WEIGHT']/(data['HEIGHT']**2)

#BMI <10 set to 10, BMI >50 set to 50
data.loc[data['BMI'] > 50, 'BMI'] = 50
data.loc[data['BMI'] < 12, 'BMI'] = 12


In [48]:
#Convert 90+ to 91 and AGE to int32
data.loc[data['AGE'] == '90+', 'AGE'] = 91
pd.to_numeric(data['AGE'], downcast='integer')

CASEID
6629599     44.0
6629786     38.0
6630805     84.0
6630866     71.0
6631518     50.0
            ... 
10363817    50.0
10363850    53.0
10363959    65.0
10363964    41.0
10363966    72.0
Name: AGE, Length: 262745, dtype: float64

In [49]:
#condense col_approach
data.loc[data['COL_APPROACH'] == 'SILS', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'SILS w/ open assist', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'SILS w/ unplanned conversion to open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'Endoscopic', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Endoscopic w/ open assist', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Endoscopic w/ unplanned conversion to open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'NOTES', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'NOTES w/ open assist', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'NOTES w/ unplanned conversion to open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'Other MIS approach', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Other MIS approach w/ open assist', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Other MIS approach w/ unplanned conversion to open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'Hybrid', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Hybrid w/ open assist', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Hybrid w/ unplanned conversion to open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'Robotic', 'COL_APPROACH'] = 'Robotic'
data.loc[data['COL_APPROACH'] == 'Robotic w/ open assist', 'COL_APPROACH'] = 'Robotic'
data.loc[data['COL_APPROACH'] == 'Robotic w/ unplanned conversion to open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'Laparoscopic w/ open assist', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Laparoscopic Hand Assisted', 'COL_APPROACH'] = 'Laparoscopic'
data.loc[data['COL_APPROACH'] == 'Laparoscopic w/ unplanned conversion to Open', 'COL_APPROACH'] = 'Laparoscopic w/ unplanned conversion to open'

data.loc[data['COL_APPROACH'] == 'Open', 'COL_APPROACH'] = 'Open (planned)'

data['COL_APPROACH'].value_counts()


Laparoscopic                                    128690
Open (planned)                                   90399
Robotic                                          23680
Laparoscopic w/ unplanned conversion to open     19511
Other                                              384
Name: COL_APPROACH, dtype: int64

In [50]:
data['COMBCPT'] = 0

In [51]:
#condense CPT codes
data.loc[data['CPT'] == 44204, 'COMBCPT'] = 'Laparoscopic partial colectomy'
data.loc[data['CPT'] == 44207, 'COMBCPT'] = 'Laparoscopic L colectomy'
data.loc[data['CPT'] == 44208, 'COMBCPT'] = 'Laparoscopic L colectomy'
data.loc[data['CPT'] == 44206, 'COMBCPT'] = 'Laparoscopic L colectomy'
data.loc[data['CPT'] == 44205, 'COMBCPT'] = 'Laparoscopic R colectomy'
data.loc[data['CPT'] == 44140, 'COMBCPT'] = 'Partial colectomy'
data.loc[data['CPT'] == 44144, 'COMBCPT'] = 'Partial colectomy'
data.loc[data['CPT'] == 44141, 'COMBCPT'] = 'Partial colectomy'
data.loc[data['CPT'] == 44160, 'COMBCPT'] = 'R colectomy'
data.loc[data['CPT'] == 44145, 'COMBCPT'] = 'L colectomy'
data.loc[data['CPT'] == 44143, 'COMBCPT'] = 'L colectomy'
data.loc[data['CPT'] == 44146, 'COMBCPT'] = 'L colectomy'
data.loc[data['CPT'] == 44146, 'COMBCPT'] = 'L colectomy'
data.loc[data['CPT'] == 44210, 'COMBCPT'] = 'Laparoscopic total colectomy'
data.loc[data['CPT'] == 44150, 'COMBCPT'] = 'Total colectomy'
data.loc[data['CPT'] == 44151, 'COMBCPT'] = 'Total colectomy'
data.loc[data['CPT'] == 44156, 'COMBCPT'] = 'Total colectomy'
data.loc[data['CPT'] == 44157, 'COMBCPT'] = 'Total colectomy'
data.loc[data['CPT'] == 44158, 'COMBCPT'] = 'Total colectomy'
data.loc[data['CPT'] == 45110, 'COMBCPT'] = 'APR'
data.loc[data['CPT'] == 45112, 'COMBCPT'] = 'APR'
data.loc[data['CPT'] == 45395, 'COMBCPT'] = 'Laparoscopic APR'
data.loc[data['CPT'] == 45130, 'COMBCPT'] = 'Proctectomy, perineal approach'
data.loc[data['CPT'] == 45123, 'COMBCPT'] = 'Proctectomy, perineal approach'
data.loc[data['CPT'] == 45135, 'COMBCPT'] = 'Proctectomy, perineal approach'
data.loc[data['CPT'] == 45121, 'COMBCPT'] = 'Proctectomy, perineal approach'
data.loc[data['CPT'] == 45395, 'COMBCPT'] = 'Laparoscopic LAR'
data.loc[data['CPT'] == 45111, 'COMBCPT'] = 'LAR'
data.loc[data['CPT'] == 45113, 'COMBCPT'] = 'LAR'
data.loc[data['CPT'] == 45119, 'COMBCPT'] = 'LAR'
data.loc[data['CPT'] == 45120, 'COMBCPT'] = 'LAR'
data.loc[data['CPT'] == 45397, 'COMBCPT'] = 'LAR'

data.loc[data['CPT'] == 44212, 'COMBCPT'] = 'Laparoscopic proctocolectomy'
data.loc[data['CPT'] == 44211, 'COMBCPT'] = 'Laparoscopic proctocolectomy'
data.loc[data['CPT'] == 44155, 'COMBCPT'] = 'Proctocolectomy'
data.loc[data['CPT'] == 44156, 'COMBCPT'] = 'Proctocolectomy'


data.loc[data['CPT'] == 45126, 'COMBCPT'] = 'Pelvic exenteration'

data.loc[data['CPT'] == 44147, 'COMBCPT'] = 'Colectomy, combined transanal approach'
data.loc[data['CPT'] == 45550, 'COMBCPT'] = 'Laparoscopic rectopexy'
data.loc[data['CPT'] == 45402, 'COMBCPT'] = 'Laparoscopic rectopexy'

data.loc[data['CPT'] == 45114, 'COMBCPT'] = 'Proctectomy, transsacral approach'
data.loc[data['CPT'] == 45160, 'COMBCPT'] = 'Proctectomy, transsacral approach'
data.loc[data['CPT'] == 45116, 'COMBCPT'] = 'Proctectomy, transsacral approach'


In [52]:
data['COMBCPT'].value_counts(dropna=False)

Laparoscopic partial colectomy            51426
Laparoscopic L colectomy                  46794
Partial colectomy                         37549
L colectomy                               37155
Laparoscopic R colectomy                  34900
R colectomy                               24425
Laparoscopic total colectomy               6727
Total colectomy                            6145
LAR                                        4332
APR                                        3705
Laparoscopic LAR                           2931
Proctectomy, perineal approach             2173
Laparoscopic proctocolectomy               2155
Proctocolectomy                             879
Laparoscopic rectopexy                      562
Pelvic exenteration                         410
Colectomy, combined transanal approach      367
Proctectomy, transsacral approach           110
Name: COMBCPT, dtype: int64

In [53]:
#create new race/ethnicity column
data.groupby('RACE_NEW')['ETHNICITY_HISPANIC'].value_counts()
data['RACE'] = data['RACE_NEW']
data.loc[data['ETHNICITY_HISPANIC'] == 'Yes', 'RACE'] = 'Hispanic'
data['RACE'].value_counts(dropna=False)

White                                  186560
Unknown/Not Reported                    30568
Black or African American               22914
Hispanic                                13256
Asian                                    7943
American Indian or Alaska Native          976
Native Hawaiian or Pacific Islander       528
Name: RACE, dtype: int64

In [54]:
data['COL_INDICATION'].value_counts()

Colon cancer                         88038
Chronic diverticular disease         35529
Other-Enter ICD-10 for diagnosis     26541
Acute diverticulitis                 20442
Non-malignant polyp                  19858
Crohn's Disease                      15753
Other-Enter ICD-9 for diagnosis      14773
Colon cancer w/ obstruction          10998
Ulcerative colitis                    8366
Rectal cancer                         8332
Volvulus                              6477
Rectal prolapse                       2449
Bleeding                              1515
Enterocolitis (e.g. C. Difficile)      844
Anal cancer                            400
Name: COL_INDICATION, dtype: int64

In [55]:
data.loc[data['COL_INDICATION'] == 'Other-Enter ICD-10 for diagnosis', 'COL_INDICATION'] = 'Other'
data.loc[data['COL_INDICATION'] == 'Other-Enter ICD-9 for diagnosis', 'COL_INDICATION'] = 'Other'
data.loc[data['COL_INDICATION'] == 'Crohn\'s disease', 'COL_INDICATION'] = 'Crohn\'s Disease'

In [56]:
#create a column called 'ADD_PROC' where the value is 1 if 'OTHERCPT1-10' and 'CONCPT1-10' is not null
data['ADDPROC'] = 0
for x in range(1,11):
    data.loc[(data['OTHERCPT'+str(x)].notnull()) & (data['CONCPT'+str(x)].notnull()), 'ADDPROC'] = 1
data['ADDPROC'].value_counts()

0    236302
1     26443
Name: ADDPROC, dtype: int64

In [85]:
data_imputed = data.copy()


In [86]:
outcomes = ['READMPODAYS1', 'READMSUSPREASON1', 'READMUNRELSUSP1', 'READMRELICD91', 'READMRELICD101', 'READMUNRELICD91', 'READMUNRELICD101','DSUPINFEC', 'DWNDINFD','DORGSPCSSI','DDEHIS','DOUPNEUMO','DREINTUB','DPULEMBOL','DFAILWEAN','DRENAINSF','DOPRENAFL','DURNINFEC','DCNSCVA','DCDARREST','DCDMI','DOTHBLEED','DOTHDVT','DOTHSYSEP', 'DOTHSESHOCK','RETORPODAYS', 'readm_ileus','COL_NODESEVAL','COL_MALIGNANCYT','COL_MALIGNANCYN']

In [87]:
num_cols = ['TOTHLOS','WORKRVU','AGE','HEIGHT','WEIGHT','PRSODM','PRBUN','PRCREAT','PRALBUM','PRBILI','PRSGOT','PRALKPH','PRWBC','PRHCT','PRPLATE','PRPTT','PRINR','PRPT','DOPERTOD', 'OPTIME', 'HTOODAY','BMI']
cat_cols = list(set(total) - set(num_cols) - set(outcomes) - set(['CASEID','ETHNICITY_HISPANIC','RACE_NEW']))
cat_cols.append('RACE')
cat_cols.append('COMBCPT')
%store num_cols

Stored 'num_cols' (list)


In [88]:
for col in cat_cols:
    data_imputed[col].fillna(value='Unknown', inplace=True)

In [89]:
data_imputed.drop(outcomes, axis=1, inplace=True)

In [90]:
col_missing = data_imputed.isnull().sum()/data.shape[0]
col_missing = col_missing.sort_values(ascending=False)
col_missing = col_missing[col_missing > 0]
(pd.DataFrame(col_missing)).head(15)

Unnamed: 0,0
DOPERTOD,0.99466
PRPT,0.992887
PRPTT,0.650939
PRINR,0.520672
PRSGOT,0.30364
PRALKPH,0.285452
PRBILI,0.2849
PRALBUM,0.2823
ETHNICITY_HISPANIC,0.113993
PRBUN,0.101635


In [91]:
drop = ['PRPT', 'PRPTT','PRSGOT', 'PRALKPH','PRBILI','PRINR','PRALBUM','ETHNICITY_HISPANIC','RACE_NEW','DOPERTOD']
data_imputed.drop(drop, axis=1, inplace=True)


In [92]:
for i in drop:
    if i in num_cols:
        num_cols.remove(i)

In [93]:
data_imputed.to_csv(r'/home/kchen/Documents/readm/data/table1_readm.csv', index=False)

In [94]:
num_imputer = SimpleImputer(strategy='median')

# Imputation for numeric:
for ncol in num_cols:
    data_imputed[ncol] = num_imputer.fit_transform(data[ncol].values.reshape(-1, 1))

In [95]:
#scale the numeric data
scaler = RobustScaler()
for ncol in num_cols:
    data_imputed[ncol] = scaler.fit_transform(data_imputed[ncol].values.reshape(-1, 1))

In [96]:
data_imputed.head()

Unnamed: 0_level_0,COL_CHEMO,COL_MALIGNANCYM,COL_APPROACH,COL_ANASTOMOTIC,COL_ILEUS,SEX,PRNCPTX,CPT,WORKRVU,INOUT,TRANST,AGE,OPERYR,DISCHDEST,ANESTHES,SURGSPEC,ELECTSURG,HEIGHT,WEIGHT,DIABETES,SMOKE,DYSPNEA,FNSTATUS2,VENTILAT,HXCOPD,ASCITES,HXCHF,HYPERMED,RENAFAIL,DIALYSIS,DISCANCR,WNDINF,STEROID,WTLOSS,BLEEDDIS,TRANSFUS,PRSEPIS,PRSODM,PRBUN,PRCREAT,PRWBC,PRHCT,PRPLATE,OTHERCPT1,OTHERCPT2,OTHERCPT3,OTHERCPT4,OTHERCPT5,OTHERCPT6,OTHERCPT7,OTHERCPT8,OTHERCPT9,OTHERCPT10,CONCPT1,CONCPT2,CONCPT3,CONCPT4,CONCPT5,CONCPT6,CONCPT7,CONCPT8,CONCPT9,CONCPT10,EMERGNCY,WNDCLAS,ASACLAS,OPTIME,TOTHLOS,HTOODAY,SUPINFEC,SSSIPATOS,WNDINFD,DSSIPATOS,ORGSPCSSI,OSSIPATOS,DEHIS,OUPNEUMO,PNAPATOS,REINTUB,PULEMBOL,FAILWEAN,VENTPATOS,RENAINSF,OPRENAFL,URNINFEC,UTIPATOS,CNSCVA,CDARREST,CDMI,OTHBLEED,OTHDVT,OTHSYSEP,SEPSISPATOS,OTHSESHOCK,SEPSHOCKPATOS,RETURNOR,STILLINHOSP,READMISSION1,OTHCDIFF,COL_INDICATION,COL_STEROID,COL_ORAL_ANTIBIOTIC,COL_MECH_BOWEL_PREP,COL_EMERGENT,COL_ICD9_EMERGENT,COL_MARGINS,COL_ICD10_EMERGENT,ost,dli,BMI,COMBCPT,RACE,ADDPROC
CASEID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1
6629599,No,Unknown,Open (planned),0,Yes,male,PRCTECT COMPL CMBN ABDOMINOPRNL W/CLST,45110.0,0.600277,Inpatient,Not transferred (admitted from home),-0.857143,2017.0,Home,General,General Surgery,Yes,0.333333,0.816667,NO,No,No,Independent,No,No,No,No,No,No,No,No,No,No,No,No,No,,0.75,-0.300883,0.862069,0.097701,1.175676,-0.228571,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,No,2-Clean/Contaminated,2-Mild Disturb,-0.243478,0.5,0.0,No Complication,No,No Complication,No,No Complication,No,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No Complication,No Complication,No Complication,No,No Complication,No,No,No,0,No Complication,Ulcerative colitis,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,0.0,0.0,0.749707,APR,White,0
6629786,No,Unknown,Open (planned),0,No,male,PRCTECT CMBN PULL-THRU W/RSVR W/NTRSTM,45119.0,0.976487,Inpatient,Not transferred (admitted from home),-1.142857,2017.0,Home,General,General Surgery,Yes,0.333333,0.966667,NO,No,No,Independent,No,No,No,No,No,No,No,No,No,No,No,No,No,,0.25,0.150442,0.724138,0.316092,1.175676,-0.47619,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,No,2-Clean/Contaminated,3-Severe Disturb,0.443478,-0.333333,0.0,No Complication,No,No Complication,No,No Complication,No,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No Complication,No Complication,No Complication,No,No Complication,No,No,No,0,No Complication,Ulcerative colitis,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,1.0,1.0,0.920883,LAR,Hispanic,0
6630805,No,Unknown,Laparoscopic,0,No,female,PRCTECT PRTL W/O ANAST PRNL APPR,45123.0,-1.045643,Inpatient,Not transferred (admitted from home),1.047619,2017.0,Home,General,General Surgery,Yes,0.166667,-0.683333,NON-INSULIN,No,No,Independent,No,No,No,No,Yes,No,No,No,No,No,No,No,No,,0.5,2.407065,0.172414,-0.086207,-0.513514,0.27619,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,57260.0,57250.0,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,No,2-Clean/Contaminated,3-Severe Disturb,-0.782609,-0.666667,0.0,No Complication,No,No Complication,No,No Complication,No,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No Complication,No Complication,No Complication,No,No Complication,No,No,No,0,No Complication,Rectal prolapse,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,0.0,0.0,-0.887693,"Proctectomy, perineal approach",White,0
6630866,No,M0/Mx,Open (planned),0,No,female,LAPS PROCTECTOMY COMBINED PULL-THRU W/RESERVOIR,45397.0,1.394191,Inpatient,Not transferred (admitted from home),0.428571,2017.0,Home,General,General Surgery,Yes,-0.333333,-0.466667,NO,No,No,Independent,No,No,No,No,No,No,No,No,No,No,No,No,No,,0.5,-0.752208,-0.517241,0.890805,0.067568,-0.085714,15734,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,52332.0,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,No,2-Clean/Contaminated,3-Severe Disturb,1.234783,-0.333333,0.0,No Complication,No,No Complication,No,No Complication,No,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No Complication,No Complication,No Complication,No,No Complication,No,No,No,0,No Complication,Rectal cancer,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,0.0,0.0,-0.364198,LAR,White,1
6631518,No,Unknown,Robotic,0,No,female,LAPS COLECTOMY ABDL W/PROCTECTOMY W/ILEOSTOMY,44212.0,1.128631,Inpatient,Not transferred (admitted from home),-0.571429,2017.0,Home,General,General Surgery,Yes,0.166667,-0.566667,NO,No,No,Independent,No,No,No,No,No,No,No,No,No,No,No,No,No,,0.0,-0.601766,-0.482759,0.689655,-0.378378,-0.057143,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,No,2-Clean/Contaminated,3-Severe Disturb,2.408696,-0.166667,0.0,No Complication,No,No Complication,No,No Complication,No,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No,No Complication,No Complication,No Complication,No Complication,No Complication,No Complication,No,No Complication,No,No,No,0,No Complication,Rectal cancer,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,Unknown,1.0,1.0,-0.750552,Laparoscopic proctocolectomy,White,0


In [97]:
data_imputed.shape

(262745, 113)

In [98]:
for col in cat_cols:
    data_imputed[col].fillna('Unknown', inplace=True)

In [99]:
le=LabelEncoder()
for col in cat_cols:
        data_imputed[col] = le.fit_transform(data_imputed[col].astype(str))


In [100]:
data['OPERYR'].value_counts()

2019.0    48918
2018.0    43908
2017.0    40783
2016.0    38410
2015.0    30023
2014.0    24001
2013.0    20559
2012.0    16143
Name: OPERYR, dtype: int64

In [101]:
data_imputed['OPERYR'].value_counts()

7    48918
6    43908
5    40783
4    38410
3    30023
2    24001
1    20559
0    16143
Name: OPERYR, dtype: int64

dum_data = pd.get_dummies(data_imputed, columns=['CPT', 'OPERYR', 'SEX', 'RACE_NEW', 'ETHNICITY_HISPANIC', 'TRANST', 'WNDCLAS', 'ASACLAS', 'DIABETES', 'SMOKE', 'DYSPNEA', 'FNSTATUS2', 'VENTILAT', 'HXCOPD', 'ASCITES', 'HXCHF', 'HYPERMED', 'RENAFAIL', 'DIALYSIS', 'STEROID', 'WTLOSS', 'BLEEDDIS', 'TRANSFUS', 'PRSEPIS', 'SEPSHOCKPATOS', 'SSSIPATOS', 'DSSIPATOS', 'OSSIPATOS', 'PNAPATOS', 'VENTPATOS', 'UTIPATOS', 'SEPSISPATOS', 'SEPSHOCKPATOS','RENAINSF', 'OPRENAFL'])

In [102]:
data19 = data_imputed[data_imputed['OPERYR'] == 7]
data19.shape

(48918, 113)

In [103]:
drop19 = data_imputed[data_imputed['OPERYR'] != 7]
drop19.shape

(213827, 113)

In [104]:
data19.to_csv(r'/home/kchen/Documents/readm/data/procol_test.csv')
drop19.to_csv(r'/home/kchen/Documents/readm/data/procol_train.csv')


In [105]:
import feather
feather_test = data19.reset_index()
feather_train = drop19.reset_index()


In [106]:
feather_test.to_feather(r'/home/kchen/Documents/readm/data/procol_test.feather')
feather_train.to_feather(r'/home/kchen/Documents/readm/data/procol_train.feather')

In [107]:
data_imputed.head()

Unnamed: 0_level_0,COL_CHEMO,COL_MALIGNANCYM,COL_APPROACH,COL_ANASTOMOTIC,COL_ILEUS,SEX,PRNCPTX,CPT,WORKRVU,INOUT,TRANST,AGE,OPERYR,DISCHDEST,ANESTHES,SURGSPEC,ELECTSURG,HEIGHT,WEIGHT,DIABETES,SMOKE,DYSPNEA,FNSTATUS2,VENTILAT,HXCOPD,ASCITES,HXCHF,HYPERMED,RENAFAIL,DIALYSIS,DISCANCR,WNDINF,STEROID,WTLOSS,BLEEDDIS,TRANSFUS,PRSEPIS,PRSODM,PRBUN,PRCREAT,PRWBC,PRHCT,PRPLATE,OTHERCPT1,OTHERCPT2,OTHERCPT3,OTHERCPT4,OTHERCPT5,OTHERCPT6,OTHERCPT7,OTHERCPT8,OTHERCPT9,OTHERCPT10,CONCPT1,CONCPT2,CONCPT3,CONCPT4,CONCPT5,CONCPT6,CONCPT7,CONCPT8,CONCPT9,CONCPT10,EMERGNCY,WNDCLAS,ASACLAS,OPTIME,TOTHLOS,HTOODAY,SUPINFEC,SSSIPATOS,WNDINFD,DSSIPATOS,ORGSPCSSI,OSSIPATOS,DEHIS,OUPNEUMO,PNAPATOS,REINTUB,PULEMBOL,FAILWEAN,VENTPATOS,RENAINSF,OPRENAFL,URNINFEC,UTIPATOS,CNSCVA,CDARREST,CDMI,OTHBLEED,OTHDVT,OTHSYSEP,SEPSISPATOS,OTHSESHOCK,SEPSHOCKPATOS,RETURNOR,STILLINHOSP,READMISSION1,OTHCDIFF,COL_INDICATION,COL_STEROID,COL_ORAL_ANTIBIOTIC,COL_MECH_BOWEL_PREP,COL_EMERGENT,COL_ICD9_EMERGENT,COL_MARGINS,COL_ICD10_EMERGENT,ost,dli,BMI,COMBCPT,RACE,ADDPROC
CASEID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1
6629599,0,5,2,0,2,1,35,22,0.600277,0,1,-0.857143,5,2,1,1,2,0.333333,0.816667,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.75,-0.300883,0.862069,0.097701,1.175676,-0.228571,1507,1316,1093,771,484,303,150,103,55,29,1455,934,686,308,149,92,47,25,15,12,0,1,1,-0.243478,0.5,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,2,0,0,0,0,0,0,0,1,12,1,1,1,6,186,1,145,0.0,0.0,0.749707,0,6,0
6629786,0,5,2,0,0,1,34,28,0.976487,0,1,-1.142857,5,2,1,1,2,0.333333,0.966667,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.25,0.150442,0.724138,0.316092,1.175676,-0.47619,1507,1316,1093,771,484,303,150,103,55,29,1455,934,686,308,149,92,47,25,15,12,0,1,2,0.443478,-0.333333,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,2,0,0,0,0,0,0,0,1,12,1,1,1,6,186,1,145,1.0,1.0,0.920883,3,3,0
6630805,0,5,0,0,0,0,42,31,-1.045643,0,1,1.047619,5,2,1,1,2,0.166667,-0.683333,2,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0.5,2.407065,0.172414,-0.086207,-0.513514,0.27619,1507,1316,1093,771,484,303,150,103,55,29,1146,722,686,308,149,92,47,25,15,12,0,1,2,-0.782609,-0.666667,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,2,0,0,0,0,0,0,0,1,11,1,1,1,6,186,1,145,0.0,0.0,-0.887693,13,6,0
6630866,0,1,2,0,0,0,31,37,1.394191,0,1,0.428571,5,2,1,1,2,-0.333333,-0.466667,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.5,-0.752208,-0.517241,0.890805,0.067568,-0.085714,105,1316,1093,771,484,303,150,103,55,29,996,934,686,308,149,92,47,25,15,12,0,1,2,1.234783,-0.333333,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,2,0,0,0,0,0,0,0,1,10,1,1,1,6,186,1,145,0.0,0.0,-0.364198,3,6,1
6631518,0,5,4,0,0,0,24,21,1.128631,0,1,-0.571429,5,2,1,1,2,0.166667,-0.566667,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,-0.601766,-0.482759,0.689655,-0.378378,-0.057143,1507,1316,1093,771,484,303,150,103,55,29,1455,934,686,308,149,92,47,25,15,12,0,1,2,2.408696,-0.166667,0.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,2,0,0,0,0,0,0,0,1,10,1,1,1,6,186,1,145,1.0,1.0,-0.750552,8,6,0
