# Creating the parquet dataset from SQLite tables

In [1]:
import os
import logging
import time

import torch

from FastEHR.database import Static
from FastEHR.database import Diagnoses
from FastEHR.database import Measurements

torch.manual_seed(1337)

logging.basicConfig(level=logging.INFO)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = "cpu"    # if more informative debugging statements are needed
print(f"Using device: {device}.")


!pwd

%load_ext autoreload
%autoreload 2

Using device: cpu.
/home/ubuntu/Documents/GitHub/SurvivEHR/FastEHR/examples/1_build_database


# Paths to example dataset and location to save generated database

In [2]:
# path_to_directory = "/home/ubuntu/Documents/GitHub/SurvivEHR/FastEHR/examples/data/_built/"

path_to_directory = os.getcwd() + "/../data/"
PATH_TO_DB = os.getcwd() + "/../data/_built/example_database.db"
print(f"Saving databse to {PATH_TO_DB}")

load = False
if load:
    logging.warning(f"Load is true, if you want to re-build database set to False")

Saving databse to /home/ubuntu/Documents/GitHub/SurvivEHR/FastEHR/examples/1_build_database/../data/_built/example_database.db


# Create static table

This is information that persists throughout a patients lifetime.

In [3]:
PATH_TO_STATIC = path_to_directory + "baseline/static_data.csv"

static = Static(PATH_TO_DB, PATH_TO_STATIC, load=load)
print(static)

INFO:root:Removing previous static table (if exists)
INFO:root:Creating static_table
Building static table: 1it [00:00, 71.50it/s]
INFO:root:Creating indexes on static_table


Static table with 100 records.


# Create none-valued event table

This is a single table which contains all none-valued events

In [4]:
PATH_TO_DIAGNOSIS = path_to_directory + "diagnoses/diagnosis_data.csv"

diagnosis = Diagnoses(PATH_TO_DB, PATH_TO_DIAGNOSIS, load=load)
print(diagnosis)

INFO:root:Creating diagnosis_table
Building diagnosis table: 1it [00:00,  5.37it/s]
INFO:root:Creating indexes on diagnosis_table


Diagnosis table with 121 records.


# Create valued event table

In [5]:
PATH_TO_DYNAMIC = path_to_directory + "timeseries/measurement_tests_medications/"

measurements = Measurements(PATH_TO_DB, PATH_TO_DYNAMIC, load=load)
print(measurements)

INFO:root:Building table from file /home/ubuntu/Documents/GitHub/SurvivEHR/FastEHR/examples/1_build_database/../data/timeseries/measurement_tests_medications/25_Hydroxyvitamin_D2_level_92_.csv to table: measurement_25_Hydroxyvitamin_D2_level_92_
INFO:root:Used event_date_col EVENT_DATE, and event_value_col Value   : 0it [00:00, ?it/s]
INFO:root:Selected from available columns ['PRACTICE_PATIENT_ID', 'EVENT_DATE', 'Value']
Adding 25_Hydroxyvitamin_D2_level_92_                                 : 1it [00:00, 73.89it/s]

Measurement table:
Measurement & Count
25_Hydroxyvitamin_D2_level_92_:                    & 4
Total                                  & 4



