In [0]:
# Imports
from IPython.display import Image
import pandas as pd
import numpy as np
from scipy.stats import binom_test

In [0]:
PATH = '/content/drive/My Drive/Master/UFMG/Dissertation/Exploration/'

In [3]:
# READ CSV - Method 2 (Drive)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## Read tables

### Patients table

In [0]:
mimic_patients_df = pd.read_csv(PATH + 'mimic-III/PATIENTS.csv')

### Admission table

In [0]:
mimic_admission_df = pd.read_csv(PATH + 'mimic-III/ADMISSIONS.csv')

#### Merge both tables and calculate true age

In [0]:
# Merge patient and admission tables
mimic_patients_admission_df = pd.merge(mimic_admission_df, mimic_patients_df, on = 'SUBJECT_ID' , how = 'inner')

In [0]:
# ADMITTIME and DOB objects to datetime
mimic_patients_admission_df["ADMITTIME"] = pd.to_datetime(mimic_patients_admission_df["ADMITTIME"], format='%Y%m%d %H:%M:%S')
mimic_patients_admission_df["DOB"] = pd.to_datetime(mimic_patients_admission_df["DOB"], format='%Y%m%d %H:%M:%S')

# True age
mimic_patients_admission_df["AGES"] = mimic_patients_admission_df["ADMITTIME"].sub(mimic_patients_admission_df["DOB"]).dt.days/365.242

# Adult patients only
mimic_patients_admission_df = mimic_patients_admission_df[(mimic_patients_admission_df.AGES >= 18)]

### ICD-9 Codes table

In [0]:
mimic_diagnoses_df = pd.read_csv(PATH + 'mimic-III/DIAGNOSES_ICD.csv')

In [0]:
mimic_patients_diagnoses_df = pd.merge(mimic_patients_admission_df, mimic_diagnoses_df, on = ['SUBJECT_ID', 'HADM_ID'], how = 'inner')

In [10]:
mimic_patients_diagnoses_df.head(5)

Unnamed: 0,ROW_ID_x,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,ROW_ID_y,GENDER,DOB,DOD,DOD_HOSP,DOD_SSN,EXPIRE_FLAG,AGES,ROW_ID,SEQ_NUM,ICD9_CODE
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,151,1.0,9678
1,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,152,2.0,9693
2,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,153,3.0,E9502
3,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,154,4.0,E9503
4,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,19,F,2131-05-07,,,,0,64.926816,155,5.0,3488


### ICD-9 Descriptions table

In [0]:
mimic_diagnoses_descriptions_df = pd.read_csv(PATH + 'mimic-III/D_ICD_DIAGNOSES.csv')

## Occurrence and Mortality

### By ethnicity

#### Read table

In [0]:
mimic_eth_mortality_df = pd.read_csv(PATH + 'mimic-III/ETHNICITY_MORTALITY.csv')

#### Unique patient with multiple rows with same ICD9

In [0]:
eth_mortality_df = mimic_eth_mortality_df.groupby(['ICD9_CODE', 'ETHNICITY', 'HOSPITAL_EXPIRE_FLAG']).size().unstack()
eth_mortality_df = eth_mortality_df.reset_index()
eth_mortality_df.columns.names = [None]
eth_mortality_df.columns = ['ICD9_CODE', 'ETHNICITY', 'ALIVE', 'DEAD']
eth_mortality_df.insert(4, 'TOTAL', 'NULL')
eth_mortality_df = eth_mortality_df.fillna(0)

# Compute alive, dead and total
for index, row in eth_mortality_df.iterrows():
  eth_mortality_df.at[index, 'TOTAL'] = row.ALIVE + row.DEAD

#### Data integrity

In [14]:
# missing codes
print(len(mimic_eth_mortality_df[mimic_eth_mortality_df['ICD9_CODE'].isnull()]))
# total patients from original table
print(len(mimic_patients_diagnoses_df))
# total patients from new table
print(eth_mortality_df["TOTAL"].sum())

44
569130
569086.0


In [15]:
# All patients
eth_mortality_group = eth_mortality_df.groupby('ETHNICITY')['TOTAL'].sum()
print(eth_mortality_group.sum())
print(eth_mortality_group)

569086.0
ETHNICITY
ASIAN       12700.0
BLACK       59730.0
LATINO      19930.0
OTHER       16228.0
UNKNOWN     50605.0
WHITE      409893.0
Name: TOTAL, dtype: float64


In [0]:
# We are not considering MULTI RACE ETHNICITY, UNKNOWN or OTHER 
eth_mortality_df = eth_mortality_df[(eth_mortality_df.ETHNICITY != 'OTHER') & (eth_mortality_df.ETHNICITY != 'UNKNOWN')]

#### Transplanted patients

In [0]:
transplants_df = eth_mortality_df.merge(mimic_diagnoses_descriptions_df, left_on='ICD9_CODE', right_on='ICD9_CODE')
transplants_df = transplants_df[transplants_df['LONG_TITLE'].str.lower().str.contains('transplant')]

In [0]:
# Remove ICD9 codes with only ONE ETHNICITY
for index, row in transplants_df.iterrows():
  rows = transplants_df.loc[transplants_df.ICD9_CODE == row.ICD9_CODE]
  if (len(rows) == 1):
    transplants_df.drop(rows.index, inplace=True)

In [0]:
transplants_df = transplants_df.drop(['ROW_ID', 'SHORT_TITLE'], axis=1)

In [72]:
pd.set_option("display.max_colwidth", 500, 'display.max_rows', 100)
transplants_df

Unnamed: 0,ICD9_CODE,ETHNICITY,ALIVE,DEAD,TOTAL,LONG_TITLE
11151,99681,ASIAN,20.0,1.0,21,Complications of transplanted kidney
11152,99681,BLACK,99.0,5.0,104,Complications of transplanted kidney
11153,99681,LATINO,14.0,1.0,15,Complications of transplanted kidney
11154,99681,WHITE,313.0,35.0,348,Complications of transplanted kidney
11155,99682,ASIAN,2.0,0.0,2,Complications of transplanted liver
11156,99682,BLACK,14.0,4.0,18,Complications of transplanted liver
11157,99682,LATINO,11.0,1.0,12,Complications of transplanted liver
11158,99682,WHITE,112.0,21.0,133,Complications of transplanted liver
11161,99685,ASIAN,3.0,1.0,4,Complications of transplanted bone marrow
11162,99685,BLACK,7.0,2.0,9,Complications of transplanted bone marrow
