In [None]:
from google.colab import auth, drive
auth.authenticate_user()

In [None]:
drive.mount('/content/gdrive')

In [None]:
import numpy as np
import pandas as pd
import os 
import random
from scipy.stats import ks_2samp
from pandas.core.common import SettingWithCopyWarning
import warnings
from pathlib import Path
from google.cloud import bigquery

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

mimicdir = '/content/gdrive/My Drive/hst953_hw1/mimic_data'
Path(mimicdir).mkdir(parents = True, exist_ok = True)

random.seed(42)
np.random.seed(42)

In [None]:
# Change this to your own project id from BigQuery
project_id='CHANGE THIS'

def run_query(query):
    return pd.io.gbq.read_gbq(query, project_id=project_id, dialect="standard")


In [None]:
# Select all patients ever admitted
all_patients_query = """
    select distinct subject_id, hadm_id
    from `physionet-data.mimiciii_clinical.diagnoses_icd`;
"""
all_patients = run_query(all_patients_query)

In [None]:
# Select patients with and without hypertension
patients_with_hypertension_query = """
    select subject_id, hadm_id
    from `physionet-data.mimiciii_clinical.diagnoses_icd`
    where icd9_code in ('4010', '4011', '4019');
"""
patients_with_hypertension = run_query(patients_with_hypertension_query)

In [None]:
all_patients['hypertension'] = 0
all_patients.loc[all_patients['hadm_id'].isin(patients_with_hypertension['hadm_id']), 'hypertension'] = 1

# Train test split
msk = np.random.rand(len(all_patients)) < 0.7
all_patients['train'] = np.where(msk, 1, 0) 

all_patients.to_csv(os.path.join(mimicdir, 'hypertension_patients.gz'), compression='gzip', index = False)

In [None]:
# Pull chartevents
chartevents_query = """
    select subject_id, hadm_id, charttime, itemid, valuenum
    from `physionet-data.mimiciii_clinical.chartevents`
    where itemid in (220045, 220210, 220277, 220181);
"""

chartevents_table = run_query(chartevents_query)
chartevents_table.to_csv(os.path.join(mimicdir, 'hypertension_charts.gz'), compression='gzip', index = False)