In [0]:
# Imports
from IPython.display import Image
import pandas as pd
import numpy as np
from scipy.stats import binom_test

In [0]:
PATH = '/content/drive/My Drive/Master/UFMG/Dissertation/Exploration/'

In [0]:
# READ CSV - Method 2 (Drive)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## Read tables

### Patients table

In [0]:
mimic_patients_df = pd.read_csv(PATH + 'mimic-III/PATIENTS.csv')

### Admission table

In [0]:
mimic_admission_df = pd.read_csv(PATH + 'mimic-III/ADMISSIONS.csv')

#### Merge both tables and calculate true age

In [0]:
# Merge patient and admission tables
mimic_patients_admission_df = pd.merge(mimic_admission_df, mimic_patients_df, on = 'SUBJECT_ID' , how = 'inner')

In [0]:
# ADMITTIME and DOB objects to datetime
mimic_patients_admission_df["ADMITTIME"] = pd.to_datetime(mimic_patients_admission_df["ADMITTIME"], format='%Y%m%d %H:%M:%S')
mimic_patients_admission_df["DOB"] = pd.to_datetime(mimic_patients_admission_df["DOB"], format='%Y%m%d %H:%M:%S')

# True age
mimic_patients_admission_df["AGES"] = mimic_patients_admission_df["ADMITTIME"].sub(mimic_patients_admission_df["DOB"]).dt.days/365.242

# Adult patients only
mimic_patients_admission_df = mimic_patients_admission_df[(mimic_patients_admission_df.AGES >= 18)]

### ICD-9 Codes table

In [0]:
mimic_diagnoses_df = pd.read_csv(PATH + 'mimic-III/DIAGNOSES_ICD.csv')

In [0]:
mimic_patients_diagnoses_df = pd.merge(mimic_patients_admission_df, mimic_diagnoses_df, on = ['SUBJECT_ID', 'HADM_ID'], how = 'inner')

Unnamed: 0,ROW_ID_x,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,ROW_ID_y,GENDER,DOB,DOD,DOD_HOSP,DOD_SSN,EXPIRE_FLAG,AGES,ROW_ID,SEQ_NUM,ICD9_CODE
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,151,1.0,9678
1,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,152,2.0,9693
2,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,153,3.0,E9502
3,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,154,4.0,E9503
4,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,155,5.0,3488


### ICD-9 Descriptions table

In [0]:
mimic_diagnoses_descriptions_df = pd.read_csv(PATH + 'mimic-III/D_ICD_DIAGNOSES.csv')

In [0]:
# new_df = mimic_patients_diagnoses_df.merge(mimic_diagnoses_descriptions_df, left_on='ICD9_CODE', right_on='ICD9_CODE')
# dead = new_df[new_df['LONG_TITLE'].str.lower().str.contains('transplant')][new_df['HOSPITAL_EXPIRE_FLAG'] == 1]

In [0]:
# dead.groupby('ETHNICITY').size()

ETHNICITY
ASIAN                               3
ASIAN - CAMBODIAN                   2
ASIAN - CHINESE                     1
ASIAN - OTHER                       1
BLACK/AFRICAN AMERICAN             27
BLACK/HAITIAN                       2
HISPANIC OR LATINO                  9
HISPANIC/LATINO - PUERTO RICAN      3
OTHER                               7
PATIENT DECLINED TO ANSWER          3
PORTUGUESE                          2
UNABLE TO OBTAIN                    6
UNKNOWN/NOT SPECIFIED              14
WHITE                             269
WHITE - OTHER EUROPEAN              1
WHITE - RUSSIAN                     1
dtype: int64

## Occurrence and Mortality

### By ethnicity

#### Creating new table with regex


In [0]:
# mimic_eth_mortality_df = mimic_patients_diagnoses_df

# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^BLACK.*', value='BLACK', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^WHITE.*', value='WHITE', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^ASIAN.*', value='ASIAN', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^LATINO.*', value='LATINO', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^HISPANIC.*', value='LATINO', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^SOUTH AMERICAN.*', value='LATINO', regex=True)

# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^CARIBBEAN ISLAND.*', value='OTHER', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^AMERICAN INDIAN.*', value='OTHER', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^MIDDLE EASTERN.*', value='OTHER', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^MULTI RACE ETHNICITY.*', value='OTHER', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER.*', value='OTHER', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^PORTUGUESE.*', value='OTHER', regex=True)

# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^UNABLE TO OBTAIN.*', value='UNKNOWN', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^UNKNOWN.*', value='UNKNOWN', regex=True)
# mimic_eth_mortality_df = mimic_eth_mortality_df.replace(to_replace='^PATIENT DECLINED TO ANSWER.*', value='UNKNOWN', regex=True)

# mimic_eth_mortality_df.to_csv('ETHNICITY_MORTALITY.csv', index = None, header=True)
# !cp ETHNICITY_MORTALITY.csv '/content/drive/My Drive/Master/UFMG/Dissertation/Exploration/mimic-III/'

#### Read table

In [0]:
mimic_eth_mortality_df = pd.read_csv(PATH + 'mimic-III/ETHNICITY_MORTALITY.csv')

#### Unique patient with multiple rows with same ICD9

In [0]:
eth_mortality_df = mimic_eth_mortality_df.groupby(['ICD9_CODE', 'ETHNICITY', 'HOSPITAL_EXPIRE_FLAG']).size().unstack()
eth_mortality_df = eth_mortality_df.reset_index()
eth_mortality_df.columns.names = [None]
eth_mortality_df.columns = ['ICD9_CODE', 'ETHNICITY', 'ALIVE', 'DEAD']
eth_mortality_df.insert(4, 'TOTAL', 'NULL')
eth_mortality_df = eth_mortality_df.fillna(0)

# Compute alive, dead and total
for index, row in eth_mortality_df.iterrows():
  eth_mortality_df.at[index, 'TOTAL'] = row.ALIVE + row.DEAD

#### Data integrity

In [0]:
# missing codes
print(len(mimic_eth_mortality_df[mimic_eth_mortality_df['ICD9_CODE'].isnull()]))
# total patients from original table
print(len(mimic_patients_diagnoses_df))
# total patients from new table
print(eth_mortality_df["TOTAL"].sum())

44
569130
569086.0


In [0]:
# All patients
eth_mortality_group = eth_mortality_df.groupby('ETHNICITY')['TOTAL'].sum()
print(eth_mortality_group.sum())
print(eth_mortality_group)

569086.0
ETHNICITY
ASIAN       12700.0
BLACK       59730.0
LATINO      19930.0
OTHER       16228.0
UNKNOWN     50605.0
WHITE      409893.0
Name: TOTAL, dtype: float64


In [0]:
eth_mortality_df = eth_mortality_df[(eth_mortality_df.ETHNICITY != 'OTHER') & (eth_mortality_df.ETHNICITY != 'UNKNOWN')]

#### Mortality significance

In [0]:
# 1. Mortality significance: only +2 patients dead
eth_mortality_significance_df = eth_mortality_df.loc[eth_mortality_df.DEAD > 2].copy()

# 2. Remove ICD9 codes with only ONE ETHNICITY
for index, row in eth_mortality_significance_df.iterrows():
  rows = eth_mortality_significance_df.loc[eth_mortality_significance_df.ICD9_CODE == row.ICD9_CODE]
  if (len(rows) == 1):
    eth_mortality_significance_df.drop(rows.index, inplace=True)

#### Ethinicity

In [0]:
# TODO:
# - REFACTORING!!!!! 

# 3. Binomial test

icd9_code, ethnicity, p_value_occ, p_value_mort = ([] for i in range(4))
asian_t, black_t, latino_t, white_t = ([] for i in range(4))
asian_d, black_d, latino_d, white_d = ([] for i in range(4))
icd_pass = True
 
for index, row in eth_mortality_significance_df.iterrows():
  if row.ICD9_CODE not in icd9_code: # icd must be evaluate only once
    icd_pass = True

  if icd_pass:
    rows = eth_mortality_significance_df[eth_mortality_significance_df["ICD9_CODE"] == row.ICD9_CODE]
    icd_pass = False
    
    asian_total, black_total, latino_total, white_total = 0, 0, 0, 0
    asian_dead, black_dead, latino_dead, white_dead = 0, 0, 0, 0
    asian_percentage_dead, black_percentage_dead, latino_percentage_dead, white_percentage_dead = 0, 0, 0, 0
    
    if rows.loc[rows.ETHNICITY == "ASIAN"]["TOTAL"].any():
      asian_total = rows.loc[rows.ETHNICITY == "ASIAN"]["TOTAL"].item()
      asian_dead = rows.loc[rows.ETHNICITY == "ASIAN"]["DEAD"].item()
      asian_percentage_dead = asian_dead / asian_total
      
    if rows.loc[rows.ETHNICITY == "BLACK"]["TOTAL"].any():
      black_total = rows.loc[rows.ETHNICITY == "BLACK"]["TOTAL"].item()
      black_dead = rows.loc[rows.ETHNICITY == "BLACK"]["DEAD"].item()
      black_percentage_dead = black_dead / black_total
      
    if rows.loc[rows.ETHNICITY == "LATINO"]["TOTAL"].any():
      latino_total = rows.loc[rows.ETHNICITY == "LATINO"]["TOTAL"].item()
      latino_dead = rows.loc[rows.ETHNICITY == "LATINO"]["DEAD"].item()
      latino_percentage_dead = latino_dead / latino_total
      
    if rows.loc[rows.ETHNICITY == "WHITE"]["TOTAL"].any():
      white_total = rows.loc[rows.ETHNICITY == "WHITE"]["TOTAL"].item()
      white_dead = rows.loc[rows.ETHNICITY == "WHITE"]["DEAD"].item()
      white_percentage_dead = white_dead / white_total
      
    asian_percentage_pop = asian_total/eth_mortality_group.ASIAN
    black_percentage_pop = black_total/eth_mortality_group.BLACK
    latino_percentage_pop = latino_total/eth_mortality_group.LATINO
    white_percentage_pop = white_total/eth_mortality_group.WHITE
    
    # If less common in asian/black/latino/white (considering total population) but they die more
    if ((asian_total != 0 and asian_dead != 0 and (asian_percentage_pop < (black_percentage_pop + latino_percentage_pop + white_percentage_pop)) and (asian_percentage_dead > (black_percentage_dead + latino_percentage_dead + white_percentage_dead))) or 
        (black_total != 0 and black_dead != 0 and (black_percentage_pop < (asian_percentage_pop + latino_percentage_pop + white_percentage_pop)) and (black_percentage_dead > (asian_percentage_dead + latino_percentage_dead + white_percentage_dead))) or
        (latino_total != 0 and latino_dead != 0 and (latino_percentage_pop < (black_percentage_pop + asian_percentage_pop + white_percentage_pop)) and (latino_percentage_dead > (black_percentage_dead + asian_percentage_dead + white_percentage_dead))) or
        (white_total != 0 and white_dead != 0 and (white_percentage_pop < (black_percentage_pop + latino_percentage_pop + asian_percentage_pop)) and (white_percentage_dead > (black_percentage_dead + latino_percentage_dead + asian_percentage_dead)))):
      
      # Values by ethnicity (for comparison)
      asian_t.append(asian_total), black_t.append(black_total), latino_t.append(latino_total), white_t.append(white_total)
      asian_d.append(asian_dead), black_d.append(black_dead), latino_d.append(latino_dead), white_d.append(white_dead)

      # Occurrences
      n_occ = asian_total + black_total + latino_total + white_total
      
      if ((asian_percentage_pop < (black_percentage_pop + latino_percentage_pop + white_percentage_pop)) and 
          (asian_percentage_dead > (black_percentage_dead + latino_percentage_dead + white_percentage_dead))):
        # asian
        x_occ = asian_total
        x_mort = asian_dead
        prob_occ = (n_occ - asian_total)/n_occ
        prob_mort = (black_percentage_dead + latino_percentage_dead + white_percentage_dead)/(n_occ - asian_total)
        ethnicity.append("ASIAN")
        icd9_code.append(rows.iloc[0]['ICD9_CODE'])
      elif ((black_percentage_pop < (asian_percentage_pop + latino_percentage_pop + white_percentage_pop)) and 
            (black_percentage_dead > (asian_percentage_dead + latino_percentage_dead + white_percentage_dead))): 
        # black
        x_occ = black_total
        x_mort = black_dead
        prob_occ = (n_occ - black_total)/n_occ
        prob_mort = (asian_percentage_dead + latino_percentage_dead + white_percentage_dead)/(n_occ - black_total)
        ethnicity.append("BLACK")
        icd9_code.append(rows.iloc[0]['ICD9_CODE'])
      elif ((latino_percentage_pop < (black_percentage_pop + asian_percentage_pop + white_percentage_pop)) and
            (latino_percentage_dead > (black_percentage_dead + asian_percentage_dead + white_percentage_dead))):
        # latino
        x_occ = latino_total
        x_mort = latino_dead
        prob_occ = (n_occ - latino_total)/n_occ
        prob_mort = (asian_percentage_dead + black_percentage_dead + white_percentage_dead)/(n_occ - latino_total)
        ethnicity.append("LATINO")
        icd9_code.append(rows.iloc[0]['ICD9_CODE'])      
      else:
        # white
        x_occ = white_total
        x_mort = white_dead
        prob_occ = (n_occ - white_total)/n_occ
        prob_mort = (asian_percentage_dead + black_percentage_dead + latino_percentage_dead)/(n_occ - white_total)
        ethnicity.append("WHITE")
        icd9_code.append(rows.iloc[0]['ICD9_CODE'])
         
      # p-value: binom test
      p_value_occ.append(binom_test(x = x_occ, n = n_occ, p = prob_occ, alternative = 'two-sided'))
      p_value_mort.append(binom_test(x = x_mort, n = x_occ, p = prob_mort, alternative = 'two-sided'))

#### Binomial test

In [0]:
dict = {'Ethnicity': ethnicity, 'ICD9': icd9_code, 'Occ p-value': p_value_occ, 
        'Mort p-value': p_value_mort, 'Asian Total': asian_t, 'Asian Dead': asian_d, 
        'Black Total': black_t, 'Black Dead': black_d, 'Latino Total': latino_t,
        'Latino Dead': latino_d, 'White Total': white_t, 'White Dead': white_d}  
    
binom_test_df = pd.DataFrame(dict)
binom_test_df = binom_test_df.merge(mimic_diagnoses_descriptions_df, left_on='ICD9', right_on='ICD9_CODE')
binom_test_df = binom_test_df.drop(['ROW_ID', 'ICD9_CODE', 'SHORT_TITLE'], axis=1)

pd.set_option("display.max_colwidth", 100)

# null hypothesis rejected: p-value < 0.05
binom_test_df[(binom_test_df['Occ p-value'] < 0.05) & (binom_test_df['Mort p-value'] < 0.05)]

Unnamed: 0,Ethnicity,ICD9,Occ p-value,Mort p-value,Asian Total,Asian Dead,Black Total,Black Dead,Latino Total,Latino Dead,White Total,White Dead,LONG_TITLE
0,WHITE,0388,4.123539e-90,4.096126e-41,0.0,0.0,23.0,6.0,0.0,0.0,137.0,38.0,Other specified septicemias
1,WHITE,04111,0.000000e+00,3.291491e-68,0.0,0.0,73.0,6.0,0.0,0.0,475.0,44.0,Methicillin susceptible Staphylococcus aureus in conditions classified elsewhere and of unspecif...
2,WHITE,04119,1.795652e-72,1.643442e-19,0.0,0.0,50.0,3.0,0.0,0.0,178.0,13.0,"Staphylococcus infection in conditions classified elsewhere and of unspecified site, other staph..."
3,ASIAN,07054,0.000000e+00,1.719012e-09,10.0,3.0,245.0,21.0,92.0,8.0,773.0,75.0,Chronic hepatitis C without mention of hepatic coma
4,WHITE,1122,2.617164e-146,1.448099e-37,0.0,0.0,38.0,5.0,0.0,0.0,225.0,30.0,Candidiasis of other urogenital sites
5,WHITE,1125,3.814478e-76,9.804372e-46,0.0,0.0,20.0,5.0,0.0,0.0,117.0,40.0,Disseminated candidiasis
6,WHITE,1173,1.046131e-41,9.224647e-41,0.0,0.0,19.0,4.0,0.0,0.0,82.0,32.0,Aspergillosis
7,WHITE,135,1.969994e-36,1.516568e-30,0.0,0.0,70.0,4.0,0.0,0.0,164.0,17.0,Sarcoidosis
8,BLACK,1579,1.337235e-25,7.117802e-07,0.0,0.0,3.0,3.0,0.0,0.0,28.0,7.0,"Malignant neoplasm of pancreas, part unspecified"
9,BLACK,1890,6.299396e-202,1.805803e-08,0.0,0.0,11.0,3.0,0.0,0.0,177.0,15.0,"Malignant neoplasm of kidney, except pelvis"


In [0]:
print(len(binom_test_df[(binom_test_df['Occ p-value'] < 0.05) & (binom_test_df['Mort p-value'] < 0.05)]))

129


In [0]:
# Only cases with transplanted organs
pd.set_option("display.max_colwidth", 500)
binom_test_df.loc[binom_test_df.iloc[:, -1].str.lower().str.contains(r'transplant')]

Unnamed: 0,Ethnicity,ICD9,Occ p-value,Mort p-value,Asian Total,Asian Dead,Black Total,Black Dead,Latino Total,Latino Dead,White Total,White Dead,LONG_TITLE
100,WHITE,99681,5.012531e-130,2.416449e-69,0.0,0.0,104.0,5.0,0.0,0.0,348.0,35.0,Complications of transplanted kidney
101,BLACK,99682,1.318717e-101,5.998044e-09,0.0,0.0,18.0,4.0,0.0,0.0,133.0,21.0,Complications of transplanted liver
131,WHITE,V4983,1.9949880000000002e-53,4.086634e-28,0.0,0.0,26.0,3.0,0.0,0.0,109.0,21.0,Awaiting organ transplant status


In [0]:
# Only cases with heart problems - CODE ?
binom_test_df.loc[binom_test_df.iloc[:, -1].str.lower().str.contains(r'atrial')]

Unnamed: 0,Ethnicity,ICD9,Occ p-value,Mort p-value,Asian Total,Asian Dead,Black Total,Black Dead,Latino Total,Latino Dead,White Total,White Dead,LONG_TITLE


In [0]:
patients_transp_liver_df = mimic_eth_mortality_df.loc[(mimic_eth_mortality_df.ICD9_CODE == "99682") & (mimic_eth_mortality_df.ETHNICITY == "BLACK")]
print(len(patients_transp_liver_df))

In [0]:
# Patient with duplicated icd9
patients_transp_liver_df.loc[(patients_transp_liver_df["SUBJECT_ID"] == 5882) & (patients_transp_liver_df["ICD9_CODE"] == "99682")]

#### MORE ABOUT PATIENT 5882

In [0]:
mimic_admission_df.loc[(mimic_admission_df["SUBJECT_ID"] == 5882)]

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
7863,7194,5882,131731,2141-08-31 23:32:00,2141-09-11 16:46:00,,EMERGENCY,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicaid,,PROTESTANT QUAKER,DIVORCED,WHITE,,,LIVER FAILURE;TRANSPLANT PRE-OP,0,1
8003,7195,5882,185301,2142-02-08 12:41:00,2142-03-16 16:20:00,,EMERGENCY,EMERGENCY ROOM ADMIT,REHAB/DISTINCT PART HOSP,Medicaid,ENGL,PROTESTANT QUAKER,MARRIED,BLACK/AFRICAN AMERICAN,2142-02-08 10:58:00,2142-02-08 13:15:00,HYPOGLYCEMIA;ALTERED MENTAL STATUS;TELEMETRY,0,1
8004,7196,5882,173457,2144-07-16 22:44:00,2144-07-31 13:15:00,,EMERGENCY,EMERGENCY ROOM ADMIT,LONG TERM CARE HOSPITAL,Medicare,ENGL,PROTESTANT QUAKER,MARRIED,BLACK/AFRICAN AMERICAN,2144-07-16 17:18:00,2144-07-17 23:34:00,BACTEREMIA,0,1
8005,7197,5882,181631,2144-09-22 19:39:00,2144-10-07 10:22:00,2144-10-07 10:22:00,EMERGENCY,PHYS REFERRAL/NORMAL DELI,DEAD/EXPIRED,Medicare,ENGL,PROTESTANT QUAKER,MARRIED,BLACK/AFRICAN AMERICAN,,,DEHYDRATION,1,1


* 4 admissions in aprox. 3 years

* In first admission patient was declared **white** (!) with liver failure

* HADM_ID **185301** and **181631** have ICD9: **99682** - COMPLICATION OF TRANSPLANTED LIVER 

* 185301 and 181631 are not sequential (?)

In [0]:
patient_5882_df = mimic_diagnoses_df.loc[(mimic_diagnoses_df["SUBJECT_ID"] == 5882)]  
patient_5882_df.merge(mimic_diagnoses_descriptions_df, left_on='ICD9_CODE', right_on='ICD9_CODE').sort_values(by="ROW_ID_x")

Unnamed: 0,ROW_ID_x,SUBJECT_ID,HADM_ID,SEQ_NUM,ICD9_CODE,ROW_ID_y,SHORT_TITLE,LONG_TITLE
0,66135,5882,131731,1.0,07054,1045,Chrnc hpt C wo hpat coma,Chronic hepatitis C without mention of hepatic coma
1,66136,5882,131731,2.0,5712,5798,Alcohol cirrhosis liver,Alcoholic cirrhosis of liver
2,66137,5882,173457,1.0,7907,10023,Bacteremia,Bacteremia
3,66138,5882,173457,2.0,56723,4969,Spontan bact peritonitis,Spontaneous bacterial peritonitis
4,66140,5882,173457,4.0,4280,4473,CHF NOS,"Congestive heart failure, unspecified"
5,66142,5882,173457,6.0,5715,5804,Cirrhosis of liver NOS,Cirrhosis of liver without mention of alcohol
7,66143,5882,173457,7.0,5859,5915,Chronic kidney dis NOS,"Chronic kidney disease, unspecified"
8,66144,5882,173457,8.0,V427,11939,Liver transplant status,Liver replaced by transplant
9,66145,5882,173457,9.0,V4983,10199,Await organ transplnt st,Awaiting organ transplant status
10,66146,5882,173457,10.0,04104,681,Enterococcus group d,"Streptococcus infection in conditions classified elsewhere and of unspecified site, streptococcus, group D [Enterococcus]"


#### Note events form patient 5882

In [0]:
note_events_patient_5882_df = pd.read_csv(PATH + 'mimic-III/sample_NoteEvents_patient5882.csv')

In [0]:
note_events_patient_5882_df

Unnamed: 0,row_id,subject_id,hadm_id,chartdate,charttime,storetime,category,description,cgid,iserror,text
0,15832,5882,173457.0,2144-07-31 00:00:00,,,Discharge summary,Report,,,Admission Date: [**2144-7-16**] Discharge Date: [**2144-7-31**]\n\nDate of Birth: [**2082-11-29**] Sex: M\n\nService: MEDICINE\n\nAllergies:\nPatient recorded as having No Known Allergies to Drugs\n\nAttending:[**First Name3 (LF) 943**]\nChief Complaint:\n# Fever\n# Bacteremia\n\nMajor Surgical or Invasive Procedure:\n# PICC placement at left upper extremity\n# Diagnositic paracentesis\n# Paracentesis (3 L)\n\nHistory of Present Illness:\n61M h/o HCV cirrhosis ...
1,15833,5882,181631.0,2144-10-07 00:00:00,,,Discharge summary,Report,,,"Admission Date: [**2144-9-22**] Discharge Date: [**2144-10-7**]\n\nDate of Birth: [**2082-11-29**] Sex: M\n\nService: MEDICINE\n\nAllergies:\nPatient recorded as having No Known Allergies to Drugs\n\nAttending:[**First Name3 (LF) 1377**]\nChief Complaint:\nN/V/tachycardia\n\nMajor Surgical or Invasive Procedure:\nnone\n\nHistory of Present Illness:\n61M w/ HCV cirrhosis s/p xplant (since failed and now s/p TIPS\nx2), DMII, HTN, and recent bacteremia (still on h..."
2,15798,5882,131731.0,2141-09-11 00:00:00,,,Discharge summary,Report,,,Admission Date: [**2141-8-31**] Discharge Date: [**2141-9-11**]\n\nDate of Birth: [**2082-11-29**] Sex: M\n\nService: [**Doctor First Name 147**]\n\nAllergies:\nPatient recorded as having No Known Allergies to Drugs\n\nAttending:[**First Name3 (LF) 695**]\nChief Complaint:\nHCV Cirrhosis\n\nMajor Surgical or Invasive Procedure:\nS/P Orthtopic Liver Transplant\n\n\nHistory of Present Illness:\n58 yo male with HCV admitted for OLT\n\nPast Medical History:\nHCV\nE...
3,15799,5882,185301.0,2142-03-16 00:00:00,,,Discharge summary,Report,,,"Admission Date: [**2142-2-8**] Discharge Date: [**2142-3-16**]\n\nDate of Birth: [**2082-11-29**] Sex: M\n\nService: SURGERY\n\nAllergies:\nPatient recorded as having No Known Allergies to Drugs\n\nAttending:[**First Name3 (LF) 668**]\nChief Complaint:\nMr. [**Known lastname **] is a 59 y.o. man with a hx of End-stage liver disease\ndue to Hepatitis C. He is status post orthotopic liver\ntransplantation on [**2141-9-1**], which has been complicated by acute\nce..."
4,74124,5882,173457.0,2144-07-23 00:00:00,,,Echo,Report,,,PATIENT/TEST INFORMATION:\nIndication: Congestive heart failure. Left ventricular function.\nHeight: (in) 69\nWeight (lb): 160\nBSA (m2): 1.88 m2\nBP (mm Hg): 169/68\nHR (bpm): 83\nStatus: Inpatient\nDate/Time: [**2144-7-23**] at 13:06\nTest: Portable TTE (Complete)\nDoppler: Full Doppler and color Doppler\nContrast: None\nTechnical Quality: Adequate\n\n\nINTERPRETATION:\n\nFindings:\n\nThis study was compared to the prior study of [**2144-7-17**].\n\n\nLEFT ATRIUM: Moderate LA enlargement.\...
5,74125,5882,173457.0,2144-07-17 00:00:00,,,Echo,Report,,,PATIENT/TEST INFORMATION:\nIndication: Endocarditis.\nHeight: (in) 69\nWeight (lb): 154\nBSA (m2): 1.85 m2\nBP (mm Hg): 156/79\nHR (bpm): 71\nStatus: Inpatient\nDate/Time: [**2144-7-17**] at 16:31\nTest: Portable TTE (Complete)\nDoppler: Full Doppler and color Doppler\nContrast: None\nTechnical Quality: Adequate\n\n\nINTERPRETATION:\n\nFindings:\n\nThis study was compared to the prior study of [**2144-6-11**].\n\n\nLEFT ATRIUM: Mild LA enlargement.\n\nRIGHT ATRIUM/INTERATRIAL SEPTUM: Mildly ...
6,74126,5882,,2144-06-11 00:00:00,,,Echo,Report,,,"PATIENT/TEST INFORMATION:\nIndication: fever , positive PICC , cx for staph epi\nHeight: (in) 69\nWeight (lb): 149\nBSA (m2): 1.82 m2\nHR (bpm): 62\nStatus: Inpatient\nDate/Time: [**2144-6-11**] at 16:27\nTest: TTE (Complete)\nDoppler: Full Doppler and color Doppler\nContrast: None\nTechnical Quality: Adequate\n\n\nINTERPRETATION:\n\nFindings:\n\nThis study was compared to the prior study of [**2144-1-13**].\n\n\nLEFT ATRIUM: Elongated LA.\n\nRIGHT ATRIUM/INTERATRIAL SEPTUM: Normal RA size...."
7,74127,5882,,2144-01-13 00:00:00,,,Echo,Report,,,PATIENT/TEST INFORMATION:\nIndication: Left ventricular function.\nHeight: (in) 69\nWeight (lb): 150\nBSA (m2): 1.83 m2\nBP (mm Hg): 140/80\nHR (bpm): 48\nStatus: Outpatient\nDate/Time: [**2144-1-13**] at 11:00\nTest: TTE (Complete)\nDoppler: Full Doppler and color Doppler\nContrast: None\nTechnical Quality: Adequate\n\n\nINTERPRETATION:\n\nFindings:\n\nThis study was compared to the report of the prior study (images not\navailable) of [**2141-3-21**].\n\n\nLEFT ATRIUM: Mild LA enlargement.\...
8,170295,5882,181631.0,2144-09-22 00:00:00,,,ECG,Report,,,"Atrial fibrillation with rapid ventricular response\nLateral ST-T changes are nonspecific\nSince previous tracing of [**2144-8-15**], atrial fibrillation with rapid ventricular\nresponse new\n\n"
9,170296,5882,,2144-08-15 00:00:00,,,ECG,Report,,,"Sinus rhythm. First degree A-V delay. Consider left atrial abnormality.\nBorderline prolonged QTc interval is non-specific but clinical correlation is\nsuggested. Since previous tracing of [**2144-8-13**] the P-R interval is slightly\nlonger but otherwise, probably no significant change.\n\n"


In [0]:
print(note_events_patient_5882_df["text"].iloc[1])

 ### Patient awaiting for transplant

In [0]:
# Ethnicity: white
# patients_awaiting_transp_group1_dead_df = mimic_eth_mortality_df.loc[(mimic_eth_mortality_df.ICD9_CODE == "V4983") & (mimic_eth_mortality_df.ETHNICITY == "WHITE") & (mimic_eth_mortality_df.HOSPITAL_EXPIRE_FLAG == 1)]
patients_awaiting_transp_group1_df = mimic_eth_mortality_df.loc[(mimic_eth_mortality_df.ICD9_CODE == "V4983") & (mimic_eth_mortality_df.ETHNICITY == "WHITE")]

In [0]:
patients_awaiting_transp_group1_df.groupby(['INSURANCE', 'HOSPITAL_EXPIRE_FLAG'])['SUBJECT_ID'].count()

INSURANCE   HOSPITAL_EXPIRE_FLAG
Government  0                        2
            1                        1
Medicaid    0                       12
            1                        4
Medicare    0                       26
            1                        6
Private     0                       48
            1                       10
Name: SUBJECT_ID, dtype: int64

In [0]:
# Ethnicity: black
patients_awaiting_transp_group2_df = mimic_eth_mortality_df.loc[(mimic_eth_mortality_df.ICD9_CODE == "V4983") & (mimic_eth_mortality_df.ETHNICITY == "BLACK")]

In [0]:
patients_awaiting_transp_group2_df.groupby(['INSURANCE', 'HOSPITAL_EXPIRE_FLAG'])['SUBJECT_ID'].count()

INSURANCE  HOSPITAL_EXPIRE_FLAG
Medicaid   0                        2
Medicare   0                       13
Private    0                        8
           1                        3
Name: SUBJECT_ID, dtype: int64