In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
import statsmodels

In [3]:
#pd.__version__

# Import demographic and hospitalization dataset.
   clean data: check missing data, reset data types

In [4]:
#import demographic
#encoding='iso-8859-1'
demogrh = pd.read_csv('/home/chenf1/pc4/data/demographic.csv',encoding = "ISO-8859-1",dtype={'patientid':'str','siteid':'str',"funddiagnosistxt":"str","funddiagnosis":"str"})

In [5]:
demogrh.isnull().sum()

patientid              0
gender                 0
raceasian              0
raceblack              0
racecaucasian          0
racenativeam           0
racenativepi           0
raceother              0
ethnicity              0
extracardyn            0
chromsyndyn            0
antenataldiag       6196
funddiagnosis          9
funddiagnosistxt       0
siteid                 0
dtype: int64

In [6]:
#fill missing data
demogrh["antenataldiag"] = demogrh["antenataldiag"].fillna(9)
demogrh.antenataldiag.value_counts()

0.0    20349
1.0    12377
9.0     8527
Name: antenataldiag, dtype: int64

In [7]:
demogrh.loc[:,demogrh.select_dtypes(include ='int64').columns] = demogrh.select_dtypes(include ='int64').apply(lambda x: x.astype('category'))
demogrh["antenataldiag"] = demogrh["antenataldiag"].astype("category")

In [8]:
demogrh.dtypes

patientid             object
gender              category
raceasian           category
raceblack           category
racecaucasian       category
racenativeam        category
racenativepi        category
raceother           category
ethnicity           category
extracardyn         category
chromsyndyn         category
antenataldiag       category
funddiagnosis         object
funddiagnosistxt      object
siteid                object
dtype: object

In [9]:
#import hospitalization
hopita = pd.read_sas('/home/chenf1/pc4/data/hospitalization.sas7bdat')

In [10]:
hopita.isnull().sum()

patientID                 0
hosptype                  0
hospadmitage              0
hospadmitagegroup         0
hospadmitwt               0
hospadmitlen           2828
insprimtype            3725
hospdischstat             0
venttotal                 0
cicueverunplannedyn       0
dnreveryn                 0
withdrawaleveryn          0
ecmoeveryn                0
siteid                    0
hospitalizationid         0
hospadmitdt               0
hospdischdt               0
dtype: int64

In [11]:
###insurance type####
hopita["insprimtype"] = hopita["insprimtype"].fillna(9)
hopita.insprimtype.value_counts()

1.0    26554
2.0    22865
9.0     3725
4.0      758
3.0      275
Name: insprimtype, dtype: int64

In [12]:
hopita.loc[:,['hosptype','hospadmitagegroup','insprimtype','hospdischstat','cicueverunplannedyn','dnreveryn','withdrawaleveryn','ecmoeveryn']]=hopita.loc[:,['hosptype','hospadmitagegroup','insprimtype','hospdischstat','cicueverunplannedyn','dnreveryn','withdrawaleveryn','ecmoeveryn']].apply(lambda x: x.astype('category'))
hopita.loc[:,['patientID','siteid','hospitalizationid']] = hopita.astype({'patientID':'str','siteid':'str','hospitalizationid':'str'})

In [13]:
hopita.dtypes

patientID                      object
hosptype                     category
hospadmitage                  float64
hospadmitagegroup            category
hospadmitwt                   float64
hospadmitlen                  float64
insprimtype                  category
hospdischstat                category
venttotal                     float64
cicueverunplannedyn          category
dnreveryn                    category
withdrawaleveryn             category
ecmoeveryn                   category
siteid                         object
hospitalizationid              object
hospadmitdt            datetime64[ns]
hospdischdt            datetime64[ns]
dtype: object

In [14]:
hopita.describe()

Unnamed: 0,hospadmitage,hospadmitwt,hospadmitlen,venttotal
count,54177.0,54177.0,51349.0,54177.0
mean,1949.480001,20.28842,89.913661,101.207045
std,3026.400439,25.182369,42.583642,407.055302
min,0.0,0.37,15.0,0.0
25%,77.0,4.36,55.0,0.0
50%,409.0,8.8,72.0,5.166667
75%,2814.0,23.5,119.0,48.166667
max,26862.0,200.0,246.4,14321.883333


# Table 1 summary table(1)--demo and hopit

In [15]:
# gender
demogrh.gender.value_counts()

1    22727
2    18510
3       15
9        1
Name: gender, dtype: int64

In [16]:
demogrh.dtypes

patientid             object
gender              category
raceasian           category
raceblack           category
racecaucasian       category
racenativeam        category
racenativepi        category
raceother           category
ethnicity           category
extracardyn         category
chromsyndyn         category
antenataldiag       category
funddiagnosis         object
funddiagnosistxt      object
siteid                object
dtype: object

In [17]:
# gender vs. race
blk_sex = demogrh[demogrh['raceblack'] == 1].gender.value_counts()
wht_sex = demogrh[demogrh['racecaucasian'] == 1].gender.value_counts()
asn_sex = demogrh[demogrh['raceasian'] == 1].gender.value_counts()
hisp_sex = demogrh[demogrh['ethnicity'] == 1].gender.value_counts()
oth_sex = demogrh[(demogrh['ethnicity'] != 1) & (demogrh['raceblack'] != 1) & (demogrh['racecaucasian'] != 1) & (demogrh['raceasian'] !=1)].gender.value_counts()

race_sex = pd.concat([blk_sex,wht_sex,asn_sex,hisp_sex,oth_sex], keys=['black', 'white','Asian','Hispanic','Others'])
race_sex

black     1     3438
          2     2925
          9        1
          3        1
white     1    14544
          2    11665
          3        4
          9        0
Asian     1      908
          2      796
          9        0
          3        0
Hispanic  1     3817
          2     3314
          3        1
          9        0
Others    1     2712
          2     2153
          3        9
          9        0
Name: gender, dtype: int64

In [18]:
hopita.dtypes

patientID                      object
hosptype                     category
hospadmitage                  float64
hospadmitagegroup            category
hospadmitwt                   float64
hospadmitlen                  float64
insprimtype                  category
hospdischstat                category
venttotal                     float64
cicueverunplannedyn          category
dnreveryn                    category
withdrawaleveryn             category
ecmoeveryn                   category
siteid                         object
hospitalizationid              object
hospadmitdt            datetime64[ns]
hospdischdt            datetime64[ns]
dtype: object

In [19]:
#age group
hopita.hospadmitagegroup.value_counts() 

4.0    23916
3.0    15523
2.0     8809
5.0     3862
1.0     2067
Name: hospadmitagegroup, dtype: int64

In [20]:
#weight group


In [21]:
#Antenatal diagnosis
demogrh.antenataldiag.value_counts() 

0.0    20349
1.0    12377
9.0     8527
Name: antenataldiag, dtype: int64

In [22]:
#Extra-cardiac abnormality
demogrh.extracardyn.value_counts()

0    34310
1     6838
9      105
Name: extracardyn, dtype: int64

In [23]:
#Chromosomal abnormality
demogrh.chromsyndyn.value_counts()

0    32243
1     8814
9      196
Name: chromsyndyn, dtype: int64

In [24]:
#Hospitalization type
hopita.hosptype.value_counts() 

1.0    36088
2.0    18089
Name: hosptype, dtype: int64

In [25]:
#Unplanned initial CICU admission
hopita.cicueverunplannedyn.value_counts() 

0.0    37313
1.0    16864
Name: cicueverunplannedyn, dtype: int64

# merge two dataframe, race variables with hopitalization IDs

In [26]:
hopita = hopita.rename(columns={"patientID": "patientid"})

In [27]:
hospita_race = pd.merge(hopita, demogrh, how='left', on = ['patientid'])

In [28]:
#hopita.patientid = hopita.patientid.str.replace('.0','').astype('str')

# import demo from sas file
and merge with hospitablizaion: hospita_race

In [29]:
#import demographics from sas
demo_sas = pd.read_sas('/home/chenf1/pc4/data/demographic.sas7bdat')

In [30]:
demo_sas.dtypes

patientid           float64
gender              float64
raceasian           float64
raceblack           float64
racecaucasian       float64
racenativeam        float64
racenativepi        float64
raceother           float64
ethnicity           float64
extracardyn         float64
chromsyndyn         float64
antenataldiag       float64
funddiagnosis       float64
funddiagnosistxt     object
siteid              float64
dtype: object

In [31]:
demo_sas.isnull().sum()

patientid              0
gender                 0
raceasian              0
raceblack              0
racecaucasian          0
racenativeam           0
racenativepi           0
raceother              0
ethnicity              0
extracardyn            0
chromsyndyn            0
antenataldiag       6196
funddiagnosis          9
funddiagnosistxt       0
siteid                 0
dtype: int64

In [32]:
#fill missing data
#demo_sas["antenataldiag"] = demo_sas["antenataldiag"].fillna("9.0")
#demo_sas["antenataldiag"] = demo_sas["antenataldiag"].astype("category")
#demo_sas.antenataldiag.value_counts()

In [33]:
demo_sas.loc[:,['patientid','siteid','funddiagnosis','funddiagnosistxt']] = demo_sas.astype({'patientid':'str','siteid':'str',"funddiagnosis":"str",'funddiagnosistxt':'str'})

In [34]:
demo_sas.loc[:,demo_sas.select_dtypes(include ='float64').columns] = demo_sas.select_dtypes(include ='float64').apply(lambda x: x.astype('category'))

In [35]:
demo_sas.head(4)

Unnamed: 0,patientid,gender,raceasian,raceblack,racecaucasian,racenativeam,racenativepi,raceother,ethnicity,extracardyn,chromsyndyn,antenataldiag,funddiagnosis,funddiagnosistxt,siteid
0,977.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,7777.0,"b'Miscellaneous, Other'",116.0
1,1042.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,960.0,"b'DORV, Remote VSD (uncommitted VSD)'",116.0
2,1068.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,330.0,"b'Pulmonary atresia, IVS'",116.0
3,1078.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1410.0,b'Persistent fetal circulation',116.0


In [36]:
demo_sas.sort_values(by=['patientid'],ascending=False)

Unnamed: 0,patientid,gender,raceasian,raceblack,racecaucasian,racenativeam,racenativepi,raceother,ethnicity,extracardyn,chromsyndyn,antenataldiag,funddiagnosis,funddiagnosistxt,siteid
2840,999.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,730.0,b'Hypoplastic left heart syndrome (HLHS)',101.0
14736,9972.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,9.0,1.0,0.0,1.0,160.0,b'Truncus arteriosus',122.0
19438,9971.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,990.0,b'Coarctation of aorta',127.0
19437,9970.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,73.0,"b'VSD, Type 2 (Perimembranous) (Paramembranous...",127.0
19436,9969.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,"b'VSD, Type 2 (Perimembranous) (Paramembranous...",127.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23279,10065.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,290.0,b'TOF',120.0
9773,10063.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,560.0,"b'Aortic stenosis, Valvar'",110.0
7105,10062.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,290.0,b'TOF',119.0
23278,10061.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2140.0,"b'TOF, Pulmonary stenosis'",120.0


In [37]:
#race_summ = pd.DataFrame(demo_sas.groupby(['raceasian','raceblack','racecaucasian','ethnicity','racenativeam','racenativepi','raceother']).size())
#race_summ

In [38]:
#create new race category variable: 1. hispanic, 2.black, 3.white, 4.asian,5.multiple & others, 9.unknown
#demo_sas["race_grp"] = demo_sas[['raceblack','racecaucasian','raceasian','ethnicity']].apply(lambda x: 1 if (x['ethnicity'] == 1.0) 
#                                                                                                     else 2 if ((x['raceblack'] ==1.0) & (x['racecaucasian'] ==0.0) & (x['raceasian'] ==0.0)) 
#                                                                                                     else 3 if ((x['raceblack'] ==0.0) & (x['racecaucasian'] ==1.0) & (x['raceasian'] ==0.0))
#                                                                                                     else 4 if ((x['raceblack'] ==0.0) & (x['racecaucasian'] ==0.0) & (x['raceasian'] ==1.0))
#                                                                                                     else 5 if ((x['raceblack'] + x['racecaucasian'] + x['raceasian'] == 2.0) | (x['raceblack'] + x['racecaucasian'] + x['raceasian'] == 3.0) )
#                                                                                                     else 9 if ((x['raceblack'] ==9.0) & (x['racecaucasian'] ==9.0) & (x['raceasian'] ==9.0))
#                                                                                                     else 6, axis = 1)

In [39]:
#create new race category variable: 1. hispanic, 2.black, 3.white, 4.asian,5. multiple & others, 9.unknown
demo_sas["race_grp"] = demo_sas[['raceblack','racecaucasian','raceasian','ethnicity']].apply(lambda x: 1 if (x['ethnicity'] == 1.0) 
                                                                                                     else 2 if ((x['raceblack'] ==1.0) & (x['racecaucasian'] ==0.0) & (x['raceasian'] ==0.0)) 
                                                                                                     else 3 if ((x['raceblack'] ==0.0) & (x['racecaucasian'] ==1.0) & (x['raceasian'] ==0.0))
                                                                                                     else 4 if ((x['raceblack'] ==0.0) & (x['racecaucasian'] ==0.0) & (x['raceasian'] ==1.0))
                                                                                                     else 9 if ((x['raceblack'] ==9.0) & (x['racecaucasian'] ==9.0) & (x['raceasian'] ==9.0))
                                                                                                     else 5, axis = 1)

In [40]:
demo_sas["race_grp"].value_counts()

3    21400
1     7132
2     5950
5     4067
4     1559
9     1145
Name: race_grp, dtype: int64

# merge demo and hopi, create a regroup variable of race

In [41]:
#panda version issure, need upgrade to 23
hospita_race = hopita.merge(demo_sas[['patientid','raceblack','racecaucasian','raceasian','ethnicity']], left_on = ["patientid"] , right_on = ["patientid"], sort = True, how = 'left')

In [42]:
hospita_race.head(4)

Unnamed: 0,patientid,hosptype,hospadmitage,hospadmitagegroup,hospadmitwt,hospadmitlen,insprimtype,hospdischstat,venttotal,cicueverunplannedyn,...,withdrawaleveryn,ecmoeveryn,siteid,hospitalizationid,hospadmitdt,hospdischdt,raceblack,racecaucasian,raceasian,ethnicity
0,10057.0,2.0,6709.0,5.0,37.6,147.8,1.0,1.0,0.0,0.0,...,0.0,0.0,119.0,1190000031408.0,2015-08-21,2015-08-22,0.0,1.0,0.0,0.0
1,10061.0,2.0,2759.0,4.0,19.4,109.0,2.0,1.0,0.0,0.0,...,0.0,0.0,120.0,1200000004957.0,2015-02-13,2015-02-14,0.0,1.0,0.0,0.0
2,10062.0,1.0,5567.0,4.0,47.6,160.0,2.0,1.0,15.783333,0.0,...,0.0,0.0,119.0,1190000031280.0,2015-07-21,2015-07-26,0.0,1.0,0.0,0.0
3,10063.0,1.0,3356.0,4.0,25.4,132.0,1.0,1.0,9.066667,0.0,...,0.0,0.0,110.0,1100000003992.0,2015-03-02,2015-03-06,1.0,0.0,0.0,0.0


In [43]:
hospita_race.dtypes

patientid                      object
hosptype                     category
hospadmitage                  float64
hospadmitagegroup            category
hospadmitwt                   float64
hospadmitlen                  float64
insprimtype                  category
hospdischstat                category
venttotal                     float64
cicueverunplannedyn          category
dnreveryn                    category
withdrawaleveryn             category
ecmoeveryn                   category
siteid                         object
hospitalizationid              object
hospadmitdt            datetime64[ns]
hospdischdt            datetime64[ns]
raceblack                    category
racecaucasian                category
raceasian                    category
ethnicity                    category
dtype: object

In [44]:
hospita_race.isnull().sum()

patientid                 0
hosptype                  0
hospadmitage              0
hospadmitagegroup         0
hospadmitwt               0
hospadmitlen           2828
insprimtype               0
hospdischstat             0
venttotal                 0
cicueverunplannedyn       0
dnreveryn                 0
withdrawaleveryn          0
ecmoeveryn                0
siteid                    0
hospitalizationid         0
hospadmitdt               0
hospdischdt               0
raceblack                 0
racecaucasian             0
raceasian                 0
ethnicity                 0
dtype: int64

In [45]:
hospita_race[['raceblack','racecaucasian','raceasian','ethnicity']].apply(lambda x: x.value_counts())

Unnamed: 0,raceblack,racecaucasian,raceasian,ethnicity
0.0,44182,18157,50557,42621
1.0,8601,34653,2225,9523
9.0,1394,1367,1395,2033


In [46]:
#create new race category variable: 1. hispanic, 2.black, 3.white, 4.asian,5.multiple & others, 9.unknown
hospita_race["race_grp"] = hospita_race[['raceblack','racecaucasian','raceasian','ethnicity']].apply(lambda x: 1 if (x['ethnicity'] == 1.0) 
                                                                                                     else 2 if ((x['raceblack'] ==1.0) & (x['racecaucasian'] ==0.0) & (x['raceasian'] ==0.0)) 
                                                                                                     else 3 if ((x['raceblack'] ==0.0) & (x['racecaucasian'] ==1.0) & (x['raceasian'] ==0.0))
                                                                                                     else 4 if ((x['raceblack'] ==0.0) & (x['racecaucasian'] ==0.0) & (x['raceasian'] ==1.0))
                                                                                                     else 9 if ((x['raceblack'] ==9.0) & (x['racecaucasian'] ==9.0) & (x['raceasian'] ==9.0))
                                                                                                     else 5, axis = 1)

In [47]:
"""def race_regrp(x):
    if (x['ethnicity'] == '1.0'): result = 1
    elif ((x['raceblack'] =='1.0') & (x['racecaucasian'] =='0.0') & (x['raceasian'] =='0.0')) : result = 2
    elif ((x['raceblack'] =='0.0') & (x['racecaucasian'] =='1.0') & (x['raceasian'] =='0.0')) : result = 3
    elif ((x['raceblack'] =='0.0') & (x['racecaucasian'] =='0.0') & (x['raceasian'] =='1.0')) : result = 4
    elif ((x['raceblack'] =='9.0') & (x['racecaucasian'] =='9.0') & (x['raceasian'] =='9.0')) : result = 5 
    else : result = 6
    return result
hospita_race["race_grp"] = hospita_race[['raceblack','racecaucasian','raceasian','ethnicity']].apply(race_regrp)"""

'def race_regrp(x):\n    if (x[\'ethnicity\'] == \'1.0\'): result = 1\n    elif ((x[\'raceblack\'] ==\'1.0\') & (x[\'racecaucasian\'] ==\'0.0\') & (x[\'raceasian\'] ==\'0.0\')) : result = 2\n    elif ((x[\'raceblack\'] ==\'0.0\') & (x[\'racecaucasian\'] ==\'1.0\') & (x[\'raceasian\'] ==\'0.0\')) : result = 3\n    elif ((x[\'raceblack\'] ==\'0.0\') & (x[\'racecaucasian\'] ==\'0.0\') & (x[\'raceasian\'] ==\'1.0\')) : result = 4\n    elif ((x[\'raceblack\'] ==\'9.0\') & (x[\'racecaucasian\'] ==\'9.0\') & (x[\'raceasian\'] ==\'9.0\')) : result = 5 \n    else : result = 6\n    return result\nhospita_race["race_grp"] = hospita_race[[\'raceblack\',\'racecaucasian\',\'raceasian\',\'ethnicity\']].apply(race_regrp)'

In [48]:
hospita_race.race_grp.value_counts()

3    28220
1     9523
2     8026
5     5089
4     2026
9     1293
Name: race_grp, dtype: int64

In [49]:
hospita_race[['raceblack','racecaucasian','raceasian','ethnicity','race_grp']].head()

Unnamed: 0,raceblack,racecaucasian,raceasian,ethnicity,race_grp
0,0.0,1.0,0.0,0.0,3
1,0.0,1.0,0.0,0.0,3
2,0.0,1.0,0.0,0.0,3
3,1.0,0.0,0.0,0.0,2
4,0.0,1.0,0.0,0.0,3


# cross table and chi squre tests (Table 1)

In [50]:
hospita_race.dtypes

patientid                      object
hosptype                     category
hospadmitage                  float64
hospadmitagegroup            category
hospadmitwt                   float64
hospadmitlen                  float64
insprimtype                  category
hospdischstat                category
venttotal                     float64
cicueverunplannedyn          category
dnreveryn                    category
withdrawaleveryn             category
ecmoeveryn                   category
siteid                         object
hospitalizationid              object
hospadmitdt            datetime64[ns]
hospdischdt            datetime64[ns]
raceblack                    category
racecaucasian                category
raceasian                    category
ethnicity                    category
race_grp                        int64
dtype: object

In [51]:
print(hospita_race.withdrawaleveryn.value_counts())
print(hospita_race.dnreveryn.value_counts())
print(hospita_race.insprimtype.value_counts())

0.0    52817
1.0     1360
Name: withdrawaleveryn, dtype: int64
0.0    51700
1.0     1441
9.0     1036
Name: dnreveryn, dtype: int64
1.0    26554
2.0    22865
9.0     3725
4.0      758
3.0      275
Name: insprimtype, dtype: int64


In [52]:
###demo gender, extracardyn, chromsyndyn ,antenataldiag vs race###

#### hospitalization type, unplanned, age group, dnreveryn,withdrawaleveryn,insprimtype vs race###
#counts/freqs
#contingency_table = pd.crosstab(hospita_race['hosptype'], hospita_race['race_grp'],margins = True)
contingency_table = pd.crosstab(hospita_race['insprimtype'],hospita_race['race_grp'])
contingency_table

race_grp,1,2,3,4,5,9
insprimtype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,6708,5700,10315,677,3012,142
2.0,2161,1896,15963,1181,1454,210
3.0,61,16,50,18,117,13
4.0,145,96,291,24,195,7
9.0,448,318,1601,126,311,921


In [53]:
#chi square test
stats.chi2_contingency(np.array(contingency_table))[0:3]

(15302.184749418255, 0.0, 20)

In [54]:
#another way for chi square test
f_obs = np.array([contingency_table.iloc[0][0:6].values,
                  contingency_table.iloc[1][0:6].values,
                 contingency_table.iloc[2][0:6].values + contingency_table.iloc[3][0:6].values,
                 contingency_table.iloc[4][0:6].values])
print(f_obs)
stats.chi2_contingency(f_obs)[0:3]

[[ 6708  5700 10315   677  3012   142]
 [ 2161  1896 15963  1181  1454   210]
 [  206   112   341    42   312    20]
 [  448   318  1601   126   311   921]]


(15199.737741427653, 0.0, 15)

In [55]:
#percentages
#cross table row percentage
#pd.crosstab(hospita_race.hosptype,hospita_race.race_grp, normalize='index').round(4)*100
pd.crosstab(hospita_race.hosptype,hospita_race.race_grp).apply(lambda r: r/r.sum(), axis=1)
#cross table column percentage
pd.crosstab(hospita_race.hosptype,hospita_race.race_grp).apply(lambda r: r/r.sum(), axis=0)


race_grp,1,2,3,4,5,9
hosptype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,0.649165,0.628333,0.672431,0.672261,0.687561,0.793503
2.0,0.350835,0.371667,0.327569,0.327739,0.312439,0.206497


# import surgey hopitalization

In [56]:
#import surgery hopitablization
surg_hospi = pd.read_sas('/home/chenf1/pc4/data/surghosp.sas7bdat')

In [57]:
surg_hospi.isnull().sum()

siteid                   0
patientID                0
VISatSurg               27
preopMVyn                0
preopMCSyn               0
preopHighRiskYN         38
preopLowRiskYN          38
procPrimary             22
procPrimaryTxt           0
pdaFlag                  0
ppmFlag                  0
STATcat                513
STATscore              513
CPBtm                    3
xClampTime              61
DHCAtm                   0
VentAdmitPostopYN        0
Vent2hrPostopYN          0
PostopFiO2yn         11857
PostopFiO2           13014
PostopMAPyn          11857
PostopMAP            15399
PostopLactateYN          0
PostopLactate         4221
VIS2hrPostop             6
hospitalizationid        0
indexpostopdttm          0
dtype: int64

In [58]:
surg_hospi.dtypes

siteid                      float64
patientID                   float64
VISatSurg                   float64
preopMVyn                   float64
preopMCSyn                  float64
preopHighRiskYN             float64
preopLowRiskYN              float64
procPrimary                 float64
procPrimaryTxt               object
pdaFlag                     float64
ppmFlag                     float64
STATcat                     float64
STATscore                   float64
CPBtm                       float64
xClampTime                  float64
DHCAtm                      float64
VentAdmitPostopYN           float64
Vent2hrPostopYN             float64
PostopFiO2yn                float64
PostopFiO2                  float64
PostopMAPyn                 float64
PostopMAP                   float64
PostopLactateYN             float64
PostopLactate               float64
VIS2hrPostop                float64
hospitalizationid           float64
indexpostopdttm      datetime64[ns]
dtype: object

In [59]:
#panda version issure, need upgrade to 23
surg_hospi = surg_hospi.rename(columns={"patientID": "patientid"})
surg_hospi['patientid'] = surg_hospi['patientid'].astype('str')

In [60]:
surg_hospi_race = surg_hospi.merge(demo_sas[['patientid','race_grp']], left_on = ["patientid"] , right_on = ["patientid"], sort = True, how = 'left')

In [61]:
surg_hospi_race.head()

Unnamed: 0,siteid,patientid,VISatSurg,preopMVyn,preopMCSyn,preopHighRiskYN,preopLowRiskYN,procPrimary,procPrimaryTxt,pdaFlag,...,PostopFiO2yn,PostopFiO2,PostopMAPyn,PostopMAP,PostopLactateYN,PostopLactate,VIS2hrPostop,hospitalizationid,indexpostopdttm,race_grp
0,119.0,10062.0,0.0,0.0,0.0,0.0,1.0,600.0,"b'Valve replacement, Pulmonic (PVR)'",0.0,...,,,,,0.0,,0.0,1190000000000.0,2015-07-21 15:00:00,3
1,110.0,10063.0,0.0,0.0,0.0,0.0,1.0,660.0,"b'Valvuloplasty, Aortic'",0.0,...,1.0,0.4,0.0,,1.0,1.0,5.5,1100000000000.0,2015-03-02 12:26:00,2
2,131.0,10066.0,0.0,0.0,0.0,0.0,0.0,1010.0,"b'Fontan, TCPC, External conduit, Nonfenestrated'",0.0,...,1.0,1.0,1.0,15.0,1.0,4.9,20.0,1310000000000.0,2015-02-05 17:03:00,1
3,131.0,10066.0,0.0,0.0,0.0,0.0,0.0,780.0,"b'Aortic stenosis, Subvalvar, Repair'",0.0,...,1.0,0.5,1.0,11.0,0.0,,7.5,1310000000000.0,2015-11-19 17:16:00,1
4,131.0,10066.0,15.0,0.0,0.0,0.0,1.0,890.0,"b'Transplant, Heart'",0.0,...,1.0,0.6,1.0,1.0,1.0,3.0,15.0,1310000000000.0,2016-12-27 13:00:00,1


In [62]:
surg_hospi_race.dtypes

siteid                      float64
patientid                    object
VISatSurg                   float64
preopMVyn                   float64
preopMCSyn                  float64
preopHighRiskYN             float64
preopLowRiskYN              float64
procPrimary                 float64
procPrimaryTxt               object
pdaFlag                     float64
ppmFlag                     float64
STATcat                     float64
STATscore                   float64
CPBtm                       float64
xClampTime                  float64
DHCAtm                      float64
VentAdmitPostopYN           float64
Vent2hrPostopYN             float64
PostopFiO2yn                float64
PostopFiO2                  float64
PostopMAPyn                 float64
PostopMAP                   float64
PostopLactateYN             float64
PostopLactate               float64
VIS2hrPostop                float64
hospitalizationid           float64
indexpostopdttm      datetime64[ns]
race_grp                    

In [63]:
print(surg_hospi_race.preopHighRiskYN.value_counts())
print(surg_hospi_race.preopLowRiskYN.value_counts())
print(surg_hospi_race.VentAdmitPostopYN.value_counts())
print(surg_hospi_race.STATcat.value_counts())
print(surg_hospi_race.PostopLactateYN.value_counts())
print(surg_hospi_race.PostopMAPyn.value_counts())
print(surg_hospi_race.PostopFiO2yn.value_counts())

0.0    35052
1.0      998
Name: preopHighRiskYN, dtype: int64
0.0    24975
1.0    11075
Name: preopLowRiskYN, dtype: int64
1.0    24817
0.0    11271
Name: VentAdmitPostopYN, dtype: int64
2.0    11290
1.0     9915
4.0     8239
3.0     4538
5.0     1593
Name: STATcat, dtype: int64
1.0    31867
0.0     4219
9.0        2
Name: PostopLactateYN, dtype: int64
1.0    20689
0.0     3542
Name: PostopMAPyn, dtype: int64
1.0    23074
0.0     1157
Name: PostopFiO2yn, dtype: int64


In [64]:
surg_hospi_race[['CPBtm',"xClampTime","VISatSurg","VIS2hrPostop",'DHCAtm']].describe()

Unnamed: 0,CPBtm,xClampTime,VISatSurg,VIS2hrPostop,DHCAtm
count,36085.0,36027.0,36061.0,36082.0,36088.0
mean,103.495303,54.351459,3.785297,10.02302,2.270505
std,84.090068,55.016958,528.19739,487.891934,9.316012
min,0.0,0.0,0.0,0.0,0.0
25%,51.0,11.0,0.0,0.0,0.0
50%,90.0,43.0,0.0,5.0,0.0
75%,143.0,80.0,0.0,8.0,0.0
max,999.0,600.0,99910.0,83005.0,192.0


In [65]:
#two way tables
contingency_table = pd.crosstab(surg_hospi_race['STATcat'],surg_hospi_race['race_grp'])
print(contingency_table)
print(stats.chi2_contingency(np.array(contingency_table))[0:3])

race_grp     1     2     3    4     5    9
STATcat                                   
1.0       1761  1412  5113  463   876  290
2.0       1934  1564  5959  413  1101  319
3.0        677   604  2471  153   517  116
4.0       1492  1146  4241  296   818  246
5.0        237   235   906   19   149   47
(115.8438678180653, 1.6689011577926236e-15, 20)


In [66]:
# two way tables-- ttest/anova
surg_hospi_race[['race_grp','CPBtm',"xClampTime","VISatSurg","VIS2hrPostop",'DHCAtm']].groupby('race_grp').mean()

Unnamed: 0_level_0,CPBtm,xClampTime,VISatSurg,VIS2hrPostop,DHCAtm
race_grp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,107.295307,56.504048,1.829767,20.508903,2.216435
2,102.785403,52.72507,20.464789,6.407932,2.376562
3,101.688238,52.534851,0.566324,8.816022,2.220542
4,109.889868,53.617084,0.994559,7.550073,1.243025
5,102.462418,59.230043,2.786098,6.113547,3.202629
9,112.550682,67.258285,0.197752,3.588402,1.184211


In [67]:
#ANOVA test
#'CPBtm',"xClampTime","VISatSurg","VIS2hrPostop",'DHCAtm'
f, p = stats.f_oneway(surg_hospi_race[surg_hospi_race['race_grp'] == 1].VIS2hrPostop.dropna(),
                      surg_hospi_race[surg_hospi_race['race_grp'] == 2].VIS2hrPostop.dropna(),
                      surg_hospi_race[surg_hospi_race['race_grp'] == 3].VIS2hrPostop.dropna(),
                      surg_hospi_race[surg_hospi_race['race_grp'] == 4].VIS2hrPostop.dropna(),
                      surg_hospi_race[surg_hospi_race['race_grp'] == 5].VIS2hrPostop.dropna(),
                      surg_hospi_race[surg_hospi_race['race_grp'] == 9].VIS2hrPostop.dropna())
 
print ('One-way ANOVA')
print ('=============')
print ('F value:', f)
print ('P value:', p, '\n')

One-way ANOVA
F value: 0.7369389595495037
P value: 0.59564845551293 



In [68]:
import statsmodels.formula.api as smf
import statsmodels.api as sm

# import encounter dataset

In [69]:
#import surgery hopitablization
enct = pd.read_sas('/home/chenf1/pc4/data/encounters.sas7bdat')
print(enct.isnull().sum())

patientID                     0
siteid                        0
EncNum                        0
EncType                       0
EncUnplanned                  0
ECMOenc                       0
CICUdeath                     0
withdrawal                58936
DNRyn                         0
comfortCareYN                 0
CompReopBleed                 0
CompReopUnplan                0
CompDiaphragm                 0
CompCardArrest                0
CompEndocard                  0
CompArrhythmiaPace            0
CompPHTN                      0
CompChyloIntv                 0
CompPneumonia                 0
CompCABSI                     0
CompSepsis                    0
CompSupSSI                    0
CompDeepSSI                   0
CompUTI                       0
CompMeningitis                0
CompStrokeHem                 0
CompSeizure                   0
CompIVH                       0
CompBrainDeath                0
CompHepaticFail               0
CompNEC                       0
CRRTarf 

In [70]:
print(enct.dtypes)

patientID                        float64
siteid                           float64
EncNum                           float64
EncType                          float64
EncUnplanned                     float64
ECMOenc                          float64
CICUdeath                        float64
withdrawal                       float64
DNRyn                            float64
comfortCareYN                    float64
CompReopBleed                    float64
CompReopUnplan                   float64
CompDiaphragm                    float64
CompCardArrest                   float64
CompEndocard                     float64
CompArrhythmiaPace               float64
CompPHTN                         float64
CompChyloIntv                    float64
CompPneumonia                    float64
CompCABSI                        float64
CompSepsis                       float64
CompSupSSI                       float64
CompDeepSSI                      float64
CompUTI                          float64
CompMeningitis  

In [71]:
print(enct.CompHepaticFail.value_counts())
print(enct.CRRTarf.value_counts())
print(enct.CompStrokeHem.value_counts())
print(enct.CompSeizure.value_counts())
print(enct[enct.EncType==1].ECMOenc.value_counts())

0.0    60320
1.0      503
9.0        5
Name: CompHepaticFail, dtype: int64
0.0    60189
1.0      637
9.0        2
Name: CRRTarf, dtype: int64
0.0    59561
1.0     1256
9.0       11
Name: CompStrokeHem, dtype: int64
0.0    59744
1.0     1078
9.0        6
Name: CompSeizure, dtype: int64
0.0    35461
1.0     1341
Name: ECMOenc, dtype: int64


In [72]:
#panda version issure, need upgrade to 23
enct.patientID = enct.patientID.astype("str")
enct_race = enct.merge(demo_sas[['patientid','race_grp']], left_on = ["patientID"] , right_on = ["patientid"], sort = True, how = 'left')

In [73]:
enct_race.dtypes

patientID                         object
siteid                           float64
EncNum                           float64
EncType                          float64
EncUnplanned                     float64
ECMOenc                          float64
CICUdeath                        float64
withdrawal                       float64
DNRyn                            float64
comfortCareYN                    float64
CompReopBleed                    float64
CompReopUnplan                   float64
CompDiaphragm                    float64
CompCardArrest                   float64
CompEndocard                     float64
CompArrhythmiaPace               float64
CompPHTN                         float64
CompChyloIntv                    float64
CompPneumonia                    float64
CompCABSI                        float64
CompSepsis                       float64
CompSupSSI                       float64
CompDeepSSI                      float64
CompUTI                          float64
CompMeningitis  

In [74]:
enct_race.head()

Unnamed: 0,patientID,siteid,EncNum,EncType,EncUnplanned,ECMOenc,CICUdeath,withdrawal,DNRyn,comfortCareYN,...,CompChyloIntvDtTm,CompIVHdtTm,CompPHTNdtTm,CompReopBleedDtTm,CompReopUnplanDtTm,CompSeizureDtTm,CompStrokeHemDtTm,CRRTarfDtTm,patientid,race_grp
0,10057.0,119.0,1.0,2.0,0.0,0.0,0.0,,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,10057.0,3
1,10061.0,120.0,1.0,2.0,0.0,0.0,0.0,,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,10061.0,3
2,10062.0,119.0,1.0,1.0,0.0,0.0,0.0,,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,10062.0,3
3,10063.0,110.0,1.0,1.0,0.0,0.0,0.0,,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,10063.0,2
4,10065.0,120.0,1.0,2.0,0.0,0.0,0.0,,0.0,0.0,...,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,10065.0,3


In [75]:
#two way table by race
#two way tables
tmp = enct_race[enct_race.EncType==1]
contingency_table = pd.crosstab(tmp.ECMOenc,tmp.race_grp)
print(contingency_table)
print(stats.chi2_contingency(np.array(contingency_table))[0:3])

race_grp     1     2      3     4     5     9
ECMOenc                                      
0.0       6096  4908  18662  1343  3426  1026
1.0        218   227    674    42   151    29
(18.13934837057793, 0.002776738214919312, 5)


# import medical hopitalization data

In [76]:
#import surgery hopitablization
med_hosp = pd.read_sas('/home/chenf1/pc4/data/medhosp.sas7bdat')
print(med_hosp.isnull().sum())

siteid                    0
patientID                 0
myocarditisYN             0
cardiomyopathyYN          0
adhfYN                    0
chronicHFyn               0
transplantRejectYN        0
PHTNyn                    0
Vent2hrMedYN              0
VIS2hrMed                12
BNPyn                     0
BNP                   15822
CrYN                      0
Cr                     9875
LactateYN                 0
Lactate               12960
hospitalizationid         0
dtype: int64


In [77]:
med_hosp.dtypes

siteid                float64
patientID             float64
myocarditisYN         float64
cardiomyopathyYN      float64
adhfYN                float64
chronicHFyn           float64
transplantRejectYN    float64
PHTNyn                float64
Vent2hrMedYN          float64
VIS2hrMed             float64
BNPyn                 float64
BNP                   float64
CrYN                  float64
Cr                    float64
LactateYN             float64
Lactate               float64
hospitalizationid     float64
dtype: object

In [78]:
#panda version issure, need upgrade to 23
med_hosp.patientID = med_hosp.patientID.astype("str")
med_hosp_race = med_hosp.merge(demo_sas[['patientid','race_grp']], left_on = ["patientID"] , right_on = ["patientid"], sort = True, how = 'left')

In [79]:
med_hosp_race.head()

Unnamed: 0,siteid,patientID,myocarditisYN,cardiomyopathyYN,adhfYN,chronicHFyn,transplantRejectYN,PHTNyn,Vent2hrMedYN,VIS2hrMed,BNPyn,BNP,CrYN,Cr,LactateYN,Lactate,hospitalizationid,patientid,race_grp
0,119.0,10057.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,0.0,,0.0,,1190000000000.0,10057.0,3
1,120.0,10061.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,0.7,0.0,,1200000000000.0,10061.0,3
2,120.0,10065.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,,0.0,,1200000000000.0,10065.0,3
3,131.0,10066.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,0.0,,1310000000000.0,10066.0,1
4,131.0,10069.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,0.0,,1310000000000.0,10069.0,3


In [80]:
med_hosp_race.dtypes

siteid                float64
patientID              object
myocarditisYN         float64
cardiomyopathyYN      float64
adhfYN                float64
chronicHFyn           float64
transplantRejectYN    float64
PHTNyn                float64
Vent2hrMedYN          float64
VIS2hrMed             float64
BNPyn                 float64
BNP                   float64
CrYN                  float64
Cr                    float64
LactateYN             float64
Lactate               float64
hospitalizationid     float64
patientid              object
race_grp                int64
dtype: object

In [81]:
# data error for VIS2hrMed
for colmns in med_hosp_race[['myocarditisYN','cardiomyopathyYN','adhfYN','chronicHFyn','transplantRejectYN','Vent2hrMedYN','PHTNyn', 
                            'BNPyn','CrYN','LactateYN']]:
    print(med_hosp_race[colmns].value_counts())

0.0    17703
1.0      386
Name: myocarditisYN, dtype: int64
0.0    16157
1.0     1932
Name: cardiomyopathyYN, dtype: int64
0.0    15737
1.0     2352
Name: adhfYN, dtype: int64
0.0    16123
1.0     1966
Name: chronicHFyn, dtype: int64
0.0    17666
1.0      423
Name: transplantRejectYN, dtype: int64
0.0    14235
1.0     3854
Name: Vent2hrMedYN, dtype: int64
0.0    16108
1.0     1981
Name: PHTNyn, dtype: int64
0.0    15819
1.0     2267
9.0        3
Name: BNPyn, dtype: int64
0.0    9869
1.0    8214
9.0       6
Name: CrYN, dtype: int64
0.0    12948
1.0     5129
9.0       12
Name: LactateYN, dtype: int64


In [82]:
med_hosp_race.describe()

Unnamed: 0,siteid,myocarditisYN,cardiomyopathyYN,adhfYN,chronicHFyn,transplantRejectYN,PHTNyn,Vent2hrMedYN,VIS2hrMed,BNPyn,BNP,CrYN,Cr,LactateYN,Lactate,hospitalizationid,race_grp
count,18089.0,18089.0,18089.0,18089.0,18089.0,18089.0,18089.0,18089.0,18077.0,18089.0,2267.0,18089.0,8214.0,18089.0,5129.0,18089.0,18089.0
mean,116.433855,0.021339,0.106805,0.130024,0.108685,0.023384,0.109514,0.213058,4.289423,0.126817,4416.198059,0.457073,0.752057,0.289513,3.286606,1164339000000.0,2.766764
std,9.978862,0.144516,0.308874,0.336339,0.311252,0.151125,0.312292,0.409479,226.687091,0.350261,23392.003857,0.521588,4.317694,0.503461,5.18303,99788620000.0,1.330065
min,101.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,1010000000000.0,1.0
25%,107.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,151.0,0.0,0.3,0.0,1.1,1070000000000.0,2.0
50%,116.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,770.0,0.0,0.5,0.0,1.8,1160000000000.0,3.0
75%,124.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2749.0,1.0,0.8,1.0,3.2,1240000000000.0,3.0
max,134.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,24000.0,9.0,730000.0,9.0,367.0,9.0,99.9,1340000000000.0,9.0


In [83]:
print(enct[enct.EncType==2.0].ECMOenc.value_counts())

0.0    23470
1.0      556
Name: ECMOenc, dtype: int64


In [84]:
#two way table by race
#'myocarditisYN','cardiomyopathyYN','adhfYN','chronicHFyn','transplantRejectYN','Vent2hrMedYN','PHTNyn','BNPyn','CrYN','LactateYN'
contingency_table = pd.crosstab(med_hosp_race.CrYN,med_hosp_race.race_grp)
contingency_table

race_grp,1,2,3,4,5,9
CrYN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,1780,1621,5033,378,904,153
1.0,1557,1362,4209,286,686,114
9.0,4,0,2,0,0,0


In [85]:
print(stats.chi2_contingency(np.array(contingency_table))[0:3])

(17.530840073291447, 0.06341253765123893, 10)


In [86]:
# two way tables-- ttest/anova
med_hosp_race[['race_grp',"VIS2hrMed"]].groupby('race_grp').mean()

Unnamed: 0_level_0,VIS2hrMed
race_grp,Unnamed: 1_level_1
1,2.409671
2,9.810705
3,3.829848
4,1.748187
5,1.987406
9,2.077154


In [87]:
#ANOVA test
stats.f_oneway(med_hosp_race[med_hosp_race['race_grp'] == 1].VIS2hrMed.dropna(),
                      med_hosp_race[med_hosp_race['race_grp'] == 2].VIS2hrMed.dropna(),
                      med_hosp_race[med_hosp_race['race_grp'] == 3].VIS2hrMed.dropna(),
                      med_hosp_race[med_hosp_race['race_grp'] == 4].VIS2hrMed.dropna(),
                      med_hosp_race[med_hosp_race['race_grp'] == 5].VIS2hrMed.dropna(),
                      med_hosp_race[med_hosp_race['race_grp'] == 9].VIS2hrMed.dropna())
 

F_onewayResult(statistic=0.4615045894771121, pvalue=0.80515482265113)

# import ECMO records 

In [88]:
#import surgery hopitablization
ecmo = pd.read_sas('/home/chenf1/pc4/data/ecmo.sas7bdat')
print(ecmo.isnull().sum())

patientID             0
siteid                0
ECMOnum               0
ECMOreason           19
hospitalizationid     0
encounterid           0
ECMOendDtTm           0
ECMOstartDtTm         0
dtype: int64


In [89]:
ecmo.head()

Unnamed: 0,patientID,siteid,ECMOnum,ECMOreason,hospitalizationid,encounterid,ECMOendDtTm,ECMOstartDtTm
0,1438.0,101.0,1.0,1.0,1010000000000.0,101001964.0,2015-09-02 14:20:00,2015-08-28 23:43:00
1,1469.0,119.0,1.0,9.0,1190000000000.0,119005262.0,2015-04-28 23:35:00,2015-04-25 21:31:00
2,1633.0,125.0,1.0,4.0,1250000000000.0,125001032.0,2015-10-27 06:55:00,2015-10-23 15:00:00
3,2099.0,101.0,1.0,1.0,1010000000000.0,101001137.0,2014-12-23 15:28:00,2014-12-19 05:15:00
4,2101.0,101.0,1.0,9.0,1010000000000.0,101001204.0,2015-03-28 08:38:00,2015-03-25 19:25:00


In [90]:
# the number of patients
ecmo.shape
#ecmo.patientID.unique().shape

(2105, 8)

In [91]:
#attach race
ecmo.patientID = ecmo.patientID.astype("str")
ecmo_race = ecmo.merge(demo_sas[['patientid','race_grp']], left_on = ["patientID"] , right_on = ["patientid"], sort = True, how = 'left')

In [92]:
ecmo_race.head()

Unnamed: 0,patientID,siteid,ECMOnum,ECMOreason,hospitalizationid,encounterid,ECMOendDtTm,ECMOstartDtTm,patientid,race_grp
0,10104.0,119.0,1.0,1.0,1190000000000.0,119005634.0,2015-06-11 12:00:00,2015-06-04 04:25:00,10104.0,5
1,10104.0,119.0,2.0,1.0,1190000000000.0,119005634.0,2015-06-12 13:55:00,2015-06-11 14:30:00,10104.0,5
2,10123.0,119.0,1.0,2.0,1190000000000.0,119008415.0,2016-07-19 12:42:00,2016-07-18 19:58:00,10123.0,1
3,10123.0,119.0,2.0,9.0,1190000000000.0,119008415.0,2016-07-25 01:11:00,2016-07-21 00:15:00,10123.0,1
4,10134.0,125.0,1.0,1.0,1250000000000.0,125000899.0,2015-07-15 23:56:00,2015-07-04 16:30:00,10134.0,2


In [93]:
# unique ecmo patients broken down by race
ecmo_race[['patientID','race_grp']].groupby('race_grp').nunique()

Unnamed: 0_level_0,patientID,race_grp
race_grp,Unnamed: 1_level_1,Unnamed: 2_level_1
1,294,1
2,333,1
3,919,1
4,59,1
5,213,1
9,37,1


In [94]:
# broken down ecmo by race
ecmo_race[['patientID','race_grp']].groupby('race_grp').count()

Unnamed: 0_level_0,patientID
race_grp,Unnamed: 1_level_1
1,332
2,368
3,1055
4,68
5,242
9,40


In [95]:
ecmo_race[['patientID','ECMOreason','race_grp']].groupby(['ECMOreason','race_grp']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,patientID
ECMOreason,race_grp,Unnamed: 2_level_1
1.0,1,88
1.0,2,128
1.0,3,372
1.0,4,17
1.0,5,93
1.0,9,11
2.0,1,31
2.0,2,38
2.0,3,98
2.0,4,4
