In [2]:
!pip install PyAthena
from pyathena import connect
from pyathena.pandas.util import as_pandas


# Import libraries
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import boto3
from botocore.client import ClientError
from IPython.display import display, HTML
%matplotlib inline


s3 = boto3.resource('s3')
client = boto3.client("sts")
account_id = client.get_caller_identity()["Account"]
my_session = boto3.session.Session()
region = my_session.region_name
athena_query_results_bucket = 'aws-athena-query-results-'+account_id+'-'+region

try:
    s3.meta.client.head_bucket(Bucket=athena_query_results_bucket)
except ClientError:
    bucket = s3.create_bucket(Bucket=athena_query_results_bucket)
    print('Creating bucket '+athena_query_results_bucket)
cursor = connect(s3_staging_dir='s3://'+athena_query_results_bucket+'/athena/temp').cursor()

Collecting PyAthena
  Downloading PyAthena-2.2.0-py3-none-any.whl (37 kB)
Collecting tenacity>=4.1.0
  Downloading tenacity-7.0.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: tenacity, PyAthena
Successfully installed PyAthena-2.2.0 tenacity-7.0.0


## Diabetic patients cohort

In [4]:
query='select * from default.diabetic_patients_cohort'
cursor.execute(query)
patients = as_pandas(cursor)
patients.head()

Unnamed: 0,subject_id,admit_time,discharge_time,mortality_flag
0,787,2142-01-31 18:55:00,2142-07-02 20:00:00,0
1,1220,2181-08-08 08:00:00,2181-08-14 12:30:00,0
2,2877,2135-01-21 07:15:00,2135-01-26 12:19:00,0
3,6024,2123-05-06 15:16:00,2127-10-08 20:40:00,0
4,4979,2109-07-07 14:27:00,2109-07-24 14:00:00,0


## Selected ICD9 codes

In [6]:
chartevents_codes = [
    ['Capillary refill rate', 3348, 224308, 223951, 8377, 115],
    ['Diastolic blood pressure', 8364, 225310, 228151, 8555, 8368, 220051, 8502, 8503, 8504, 8505, 8506, 8507, 8508, 153, 8440, 224643, 227242, 8441, 220180, 8444, 8445, 8446, 8448, 220060],
    ['Fraction inspired oxygen', 7146, 226767, 227035, 228192, 228193, 228232],
    ['Glascow coma scale eye opening', 184, 220739],
    ['Glascow coma scale motor response', 223901, 226757],
    ['Glascow coma scale total', 198],
    ['Glascow coma scale verbal response', 223900, 226758],
    ['Glucose', 3744, 3745, 1310, 807, 1529, 811, 220621, 226537, 3447, 225664],
    ['Heart Rate', 211, 220045],
    ['Height', 226730],
    ['Mean blood pressure', 225312, 52, 6702, 220052, 6927, 3312, 3314, 3316, 7618, 3318, 3320, 3322, 7620, 7622, 3324, 5702, 443, 456, 220181],
    ['Oxygen saturation', 0],
    ['Respiratory rate', 220210, 618, 224688, 224690, 224689, 619],
    ['Systolic blood pressure', 51, 225309, 220050, 3313, 3315, 3317, 3319, 3321, 3323, 3325, 442, 224167, 227243, 455, 220179, 480, 482, 484 ],
    ['Temperature', 224027, 645, 8537, 676, 677, 223762, 678, 679, 223761],
    ['Weight', 581],
    ['pH', 1126, 780, 223830, 220274, 220734, 4753, 4202, 1365, 7717, 3839]
]

labevents_codes = [
    ['Oxygen Saturation', 50817],
    ['Temperature', 50825],
    ['pH', 50820],
    ['% Hemoglobin A1c', 50852, 50854],
    ['Blood Glucose', 50931, 51529],
    ['Serum Creatinine', 50912]    
]

In [57]:
def create_codes_table():
    query = "SHOW TABLES LIKE 'featurescodes'"
    cursor.execute(query)
    if (cursor.fetchone() is None):
        try:
            query = "create external table default.featurescodes  (mimiciiitable string, feature string, icd9code int) stored as PARQUET location 's3://aws-athena-query-results-067114122515-us-east-1/events'"
            cursor.execute(query)
            for f in labevents_codes:
                fn = f[0]
                for c in f[1:]:
                    query = "insert into featurescodes values ('labevents','" + fn +"'," + str(c) +")"
                    cursor.execute(query)
            for f in chartevents_codes:
                fn = f[0]
                for c in f[1:]:
                    query = "insert into featurescodes values ('chartevents','" + fn +"'," + str(c) +")"
                    cursor.execute(query)

            print ("featurescodes table created!")
        except Exception as e:
            print (e)
    else:
        print ("featurescodes table already exists.")
            

featurescodes table created!
