In [None]:
%load_ext sparksql_magic

In [None]:
import os
import pandas as pd
import json

from pyspark.sql import SparkSession
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [None]:
DATA_BUCKET = "../data_bucket/physionet_extract"
for item in os.listdir(DATA_BUCKET):
    print(item)

In [None]:
spark = SparkSession.builder.appName("FhirDataApplication").getOrCreate()

# Read JSON with Schema

In [None]:
with open(f"./schema/Encounter.json") as f:
    schema_read = json.loads(f.read())
    encounter_schema = T.StructType.fromJson(schema_read)

In [None]:
resource = "MimicEncounter"
data_path = f"{DATA_BUCKET}/MimicEncounter.ndjson"
df_Encounter = spark.read.schema(encounter_schema).json(data_path)
df_Encounter.createOrReplaceTempView("df_Encounter")

In [None]:
%%sparksql
select count(*) from df_Encounter limit 1

# Read JSON Infer Schema

In [None]:
df_MimicEncounter = spark.read.json(f"{DATA_BUCKET}/MimicEncounter.ndjson")
df_MimicEncounterED = spark.read.json(f"{DATA_BUCKET}/MimicEncounterED.ndjson")
df_MimicEncounterICU = spark.read.json(f"{DATA_BUCKET}/MimicEncounterICU.ndjson")

df_MimicEncounter.createOrReplaceTempView("df_MimicEncounter")
df_MimicEncounterED.createOrReplaceTempView("df_MimicEncounterED")
df_MimicEncounterICU.createOrReplaceTempView("df_MimicEncounterICU")

In [None]:
df_MimicEncounter.printSchema()

In [None]:
%%sparksql
SELECT * from df_MimicEncounter limit 2

In [None]:
%%sparksql
-- Encounter Class of Each DF
-- https://hl7.org/fhir/R4/v3/ActEncounterCode/vs.html
SELECT "df_MimicEncounter" src, class.code, count(1) cnt FROM df_MimicEncounter GROUP BY 1,2 UNION
SELECT "df_MimicEncounterICU" src, class.code, count(1) cnt FROM df_MimicEncounterICU GROUP BY 1,2 UNION
SELECT "df_MimicEncounterED" src, class.code, count(1) cnt FROM df_MimicEncounterED GROUP BY 1,2
ORDER BY 1,2

In [None]:
%%sparksql
-- Code System
-- AdmitSource: https://mimic.mit.edu/fhir/ValueSet-mimic-admit-source.html
-- DischargeDisposition: https://mimic.mit.edu/fhir/ValueSet-mimic-discharge-disposition.html
SELECT distinct "admitSource" key, explode(hospitalization.admitSource.coding.code) value from df_MimicEncounter UNION
SELECT distinct "dischargeDisposition" key, explode(hospitalization.dischargeDisposition.coding.code) value from df_MimicEncounter
ORDER BY 1,2

In [None]:
%%sparksql -l 10
SELECT distinct
hospitalization.admitSource.coding.code[0] admitSource,
hospitalization.dischargeDisposition.coding.code[0] dischargeDisposition
from df_MimicEncounter
order by 1

In [None]:
%%sparksql -l 10
SELECT distinct explode(type)
from df_MimicEncounter
order by 1