### ECG Patient Medications ###
Goals:
    - pull PatientIDs and medications from EDW
    - compare with medications from internal database
    - pull medications for all ECG patients from internal database

In [2]:
import os
import glob
import numpy as np

import pandas as pd
from ehr.edw import Epic
from ehr.edw import ExternalIdentity
from dotenv import load_dotenv

from ehr.ehr_db import EhrDb

%load_ext autoreload
%autoreload 2
seed = 123

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# SQL Alchemy to access local database
from sqlalchemy import create_engine

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# Files and directories
data_root = os.path.normpath('/mnt/obi0/andreas/data/ecg')
parquet_dir = os.path.join(data_root, 'parquet')

# Database access credentials
dotenv_file = os.path.normpath('/mnt/obi0/andreas/config/credentials.env')
load_dotenv(dotenv_file)

mgh_idtype = ExternalIdentity.MGHMRN

In [4]:
print(os.environ['EHR_DEV_USER'])
print(os.environ['EHR_DEV_URL'])

andreas
postgresql+psycopg2://andreas:Br0wse!Dynasty.Postal#5432@obi-cpu8:5432/ehr_dev_andreas


In [3]:
# Concatenate all MGH ECG files
concatenated_parquet_file_name = 'MGH_RAW_FROM2001.parquet'
file_list = sorted(glob.glob(os.path.join(parquet_dir, 'MGH_RAW_meta_2*.parquet')))
def concatenate_parquet(file_list):
    df_list = []
    for f, file in enumerate(file_list):
        print(f'Loading file {f+1} of {len(file_list)}: {os.path.basename(file)}')
        df_list.append(pd.read_parquet(file))
    df = pd.concat(df_list, ignore_index=True).reset_index(drop=True)
    return df

# Load the data
#df = concatenate_parquet(file_list)
#df.to_parquet(os.path.join(parquet_dir, concatenated_parquet_file_name))
df = pd.read_parquet(os.path.join(parquet_dir, concatenated_parquet_file_name))
print(f'Loaded {len(df.file.unique())} ECGs from {len(df.PatientID.unique())} mrns.')
# Rename PatientID column: it is really an mrn
df = df.rename(columns={'PatientID': 'mrn'})
df.head()

Loaded 2484952 ECGs from 543611 mrns.


Unnamed: 0,StmtText,VentricularRate,AtrialRate,PRInterval,QRSDuration,QTInterval,QTCorrected,PAxis,RAxis,TAxis,QRSCount,QOnset,QOffset,POnset,POffset,TOffset,mrn,TestDate,TestTime,WaveformType,WaveformStartTime,NumberofLeads,SampleType,SampleBase,SampleExponent,HighPassFilter,LowPassFilter,ACFilter,DataType,AcquisitionDevice,AcquisitionTime,AcquisitionDate,file,dir,year,n_samples,EMRN,study_filename,fdigit,Gender,DateofBirth,Race,HeightIN,WeightLBS,SystolicBP,DiastolicBP,QTcFrederica
0,,83,83,163,96,383,450,16,7,-1,14,109,132,70,99,201,3488925,2001-09-11,14:10:00,Median,0,8,CONTINUOUS_SAMPLES,240,0,5,150,NONE,RESTING,MAC,14:10:00,09-11-2001,MUSE_20191204_125204_04000.xml,/mnt/obi0/phi/ecg/import/MGH_RAW/2001/10,2001,2500,e0373ea83ec165,e0373ea83ec165_e3bf660f468c4f7d059461add579515...,e0373,,NaT,,,,,,
1,,46,46,156,100,496,434,9,26,70,7,106,131,67,97,230,1440449,2001-10-11,11:21:00,Median,0,8,CONTINUOUS_SAMPLES,250,0,5,40,NONE,RESTING,MAC,11:21:00,10-11-2001,MUSE_20191204_131933_52000.xml,/mnt/obi0/phi/ecg/import/MGH_RAW/2001/10,2001,2500,e20bcb2751455f,e20bcb2751455f_e20f2b94a4f887a3f283a9ad023e662...,e20bc,MALE,1933-01-01,CAUCASIAN,68.0,225.0,,,
2,,56,56,170,76,422,407,62,32,62,9,113,132,70,98,218,1178080,2001-08-28,10:25:00,Median,0,8,CONTINUOUS_SAMPLES,250,0,5,150,NONE,RESTING,MACVU,10:25:00,08-28-2001,MUSE_20191204_120242_57000.xml,/mnt/obi0/phi/ecg/import/MGH_RAW/2001/10,2001,2500,e20ebb26cdaaf8,e20ebb26cdaaf8_e3bec6a036ad25715669b662d508c1f...,e20eb,FEMALE,NaT,,,,,,
3,,88,88,146,79,350,423,74,73,63,15,111,130,76,100,195,1845538,2001-08-11,09:09:00,Median,0,8,CONTINUOUS_SAMPLES,240,0,5,150,NONE,RESTING,MAC,09:09:00,08-11-2001,MUSE_20191204_145941_78000.xml,/mnt/obi0/phi/ecg/import/MGH_RAW/2001/10,2001,2500,e207cc623fc2de,e207cc623fc2de_e3bec6a358a173c8efb4652c155e44a...,e207c,FEMALE,1932-08-10,CAUCASIAN,8.0,,,,
4,,84,84,142,92,367,434,76,84,47,15,111,133,77,105,199,3564102,2001-09-14,15:17:00,Median,0,8,CONTINUOUS_SAMPLES,240,0,5,150,NONE,RESTING,MAC,15:17:00,09-14-2001,MUSE_20191204_122418_85000.xml,/mnt/obi0/phi/ecg/import/MGH_RAW/2001/10,2001,2500,e036e2b243962b,e036e2b243962b_e3bf660f437dfb1c2dd89cbcf0b1224...,e036e,MALE,1959-11-01,CAUCASIAN,68.0,144.0,,,


In [4]:
# Get a sample
np.random.seed(seed)
df_sample = df.sample(n=50).reset_index(drop=True)
sample_mrn_list = list(df_sample.mrn.unique())
print(*sample_mrn_list)

1192672 3180919 3156781 2355298 3857623 4391017 4174157 0397404 2713193 3428659 6181632 5042389 1065343 4152044 4347496 3886088 3305759 2975045 4789811 3152674 3928257 4714971 1867601 4892118 5258087 0943606 1524145 4606501 3062569 3559573 4554298 4595376 2878405 2699748 1100646 2885024 5888389 6024049 3567414 2882968 2015846 4439755 4112426 4959090 4702955 4315017 6549510 1089061 1688637 6041816


In [11]:
epic = Epic(edw_user=os.environ['EDW_USER'],
            edw_password=os.environ['EDW_PASSWORD'],
            db='PHS',
            out_dir=data_root)

df_id = epic.patientids_from_external(external_patient_list=sample_mrn_list, 
                                      external_identity=mgh_idtype)
patient_id_list = list(df_id.PatientID.unique())
df_id.head(2)

Unnamed: 0,PatientID,BWHMRN,EMPI,MGHMRN,PMRN
0,Z10426758,17605189.0,102490750,1100646,10029694931
1,Z10465546,,100593871,3305759,10071910607


In [13]:
len(patient_id_list)

50

In [14]:
# Development database
db = EhrDb(user = os.environ['EHR_USER'],
           password = os.environ['EHR_DEV_PASSWORD'],
           host='obi-cpu8',
           port='5432',
           db='ehr_dev_andreas')

epic = Epic(edw_user=os.environ['EDW_USER'],
            edw_password=os.environ['EDW_PASSWORD'],
            db='PHS',
            out_dir=data_root)

In [None]:
# Load some data into the test database
chunk_sizes = {'medications': 20}
db.import_epic(name='test',
               description='import test',
               protocol='2019P002257',
               query_ids=patient_id_list,
               chunk_sizes=chunk_sizes,
               epic=epic)