In [1]:
#import packages that will be used
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
from sklearn.model_selection import train_test_split
np.random.seed(42)
import sqlite3 as sl
%matplotlib inline

In [2]:
conn = sl.connect('../Data/patient_data.db')
c = conn.cursor()

In [3]:
res = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")
for name in res:
    print (name[0])

ADMISSIONS
CALLOUT
CPTEVENTS
DX_ICD
D_CPT
DRGCODES
ICUSTAY
NOTES
PATIENTS
SERVICES
DRG


In [4]:
new_data = pd.read_sql('''
                    SELECT adm.subject_id, 
                          adm.hadm_id,
                          notes.category,
                          notes.description,
                          notes.text,
                          adm.diagnosis,
                          cast(JulianDay(adm.dischtime) - JulianDay(adm.admittime) as int) as LOS,
                          cast((JulianDay(adm.admittime) - JulianDay(pt.DOB)) / 365  as int) as Age_at_Admission,
                          cast((JulianDay(pt.DOD) - JulianDay(pt.DOB)) / 365 as int) as Age_at_Death,
                          drg.DRG_CODE,
                          drg.DRG_TYPE,
                          drg.DESCRIPTION AS DRG_DESC,
                          drg.DRG_SEVERITY,
                          drg.DRG_MORTALITY,
                          dx.icd9_code,
                          pt.gender,
                          pt.dob,
                          pt.dod,
                          pt.dod_hosp,
                          pt.expire_flag
                    FROM admissions as adm
                    JOIN notes on adm.subject_id = notes.subject_id
                         AND adm.hadm_id = notes.hadm_id
                    JOIN drg on drg.subject_id = adm.subject_id and drg.hadm_id = adm.hadm_id
                    JOIN DX_ICD dx on dx.subject_id = adm.subject_id and dx.hadm_id = adm.hadm_id
                    JOIN patients pt on pt.subject_id = adm.subject_id 
                    WHERE dx.SEQ_NUM = 1
                    AND drg.drg_type = 'MS'
                    AND notes.category in ('Discharge summary','Physician');
                       ''',conn)
new_data

Unnamed: 0,SUBJECT_ID,HADM_ID,CATEGORY,DESCRIPTION,TEXT,DIAGNOSIS,LOS,Age_at_Admission,Age_at_Death,DRG_CODE,DRG_TYPE,DRG_DESC,DRG_SEVERITY,DRG_MORTALITY,ICD9_CODE,GENDER,DOB,DOD,DOD_HOSP,EXPIRE_FLAG
0,29396,182126,Discharge summary,Report,Unit No: [**Numeric Identifier 73446**]\nAdmi...,NEWBORN,113,0,,790,MS,EXTREME IMMATURITY OR RESPIRATORY DISTRESS SYN...,,,V3000,F,2179-06-02 00:00:00,,,0
1,7917,136806,Discharge summary,Report,Admission Date: [**2157-6-17**] Dischar...,NEWBORN,131,0,,789,MS,"NEONATES, DIED OR TRANSFERRED TO ANOTHER ACUTE...",,,V3001,M,2157-06-17 00:00:00,,,0
2,5689,157267,Discharge summary,Report,Admission Date: [**2124-9-18**] ...,EKG CHANGES,10,76,76.0,391,MS,"ESOPHAGITIS, GASTROENT & MISC DIGEST DISORDERS...",,,0088,F,2048-07-14 00:00:00,2125-01-08 00:00:00,2125-01-08 00:00:00,1
3,28380,123103,Discharge summary,Report,Admission Date: [**2131-11-5**] ...,CORONARY ARTERY DISEASE,5,51,,236,MS,CORONARY BYPASS W/O CARDIAC CATH W/O MCC,,,41401,M,2080-01-17 00:00:00,,,0
4,28389,139931,Discharge summary,Report,Admission Date: [**2152-9-15**] ...,ABDOMINAL PAIN;AORTIC DISSECTION;TELEMETRY,10,47,,329,MS,MAJOR SMALL & LARGE BOWEL PROCEDURES W MCC,,,5570,F,2104-11-22 00:00:00,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28025,60929,109316,Discharge summary,Report,Admission Date: [**2142-6-29**] ...,SEIZURE,5,58,,896,MS,ALCOHOL/DRUG ABUSE OR DEPENDENCE W/O REHABILIT...,,,29181,F,2084-06-29 00:00:00,,,0
28026,75779,123505,Discharge summary,Report,Admission Date: [**2128-8-6**] D...,CONGESTIVE HEART FAILURE,21,73,,216,MS,CARDIAC VALVE & OTH MAJ CARDIOTHORACIC PROC W ...,,,41405,M,2055-06-23 00:00:00,,,0
28027,71582,101422,Discharge summary,Report,Admission Date: [**2100-7-16**] ...,AORTIC STENOSIS,8,76,,221,MS,CARDIAC VALVE & OTH MAJ CARDIOTHORACIC PROC W/...,,,4241,M,2024-01-26 00:00:00,,,0
28028,46449,110075,Discharge summary,Addendum,"Name: [**Known lastname 13679**],[**Known fir...",CHEST PAIN,11,74,,234,MS,CORONARY BYPASS W CARDIAC CATH W/O MCC,,,41071,M,2100-02-13 00:00:00,,,0
