# Patient Administrative Outcomes Predictive Model

## 00. Setup Envrionment

- Encrypt the disk with LUKS
- [Encrypt on Folder Level with ENCFS](https://help.ubuntu.com/community/FolderEncryption)
- Install Spark
- Setup Postgres

## 1. Initalize Spark Session

In [2]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
import pyspark.sql.types as T

import json

In [14]:
# Define PostgreSQL connection properties
connection_url = "jdbc:postgresql://localhost:5432/my_fhir_project"
connection_config = {
    "user": "my_fhir_user",
    "password": "**********",
    "driver": "org.postgresql.Driver"
}

In [None]:
spark = SparkSession.builder \
    .appName("FhirDataApplication") \
    .config("spark.jars", "/home/snowblade/Downloads/postgresql-42.7.4.jar") \
    .getOrCreate()

display(spark)

## 2. Data Processing

### 2.1 FHIR Integration | Medallion Architecture

#### 2.1.1. Bronze Layer

In [None]:
CREATE SCHEMA IF NOT EXISTS bronze;
CREATE TABLE IF NOT EXISTS bronze.patient_data (
    value jsonb
);

CREATE TABLE IF NOT EXISTS bronze.encounter_data (
    value jsonb
);

CREATE TABLE IF NOT EXISTS bronze.condition_data (
    value jsonb
);

CREATE TABLE IF NOT EXISTS bronze.procedure_data (
    value jsonb
);

In [None]:
bronze_patient_df =  spark.read.text("/home/snowblade/visible/MimicPatient.ndjson.gz")

bronze_patient_df.write.jdbc(
    table="bronze.patient_data",
    mode="overwrite",
    url=connection_url,
    properties=connection_config
)


In [None]:
bronze_encounter1_df =  spark.read.text("/home/snowblade/visible/MimicEncounter.ndjson.gz")
bronze_encounter2_df =  spark.read.text("/home/snowblade/visible/MimicEncounterED.ndjson.gz")
bronze_encounter_df =  bronze_encounter1_df.union(bronze_encounter2_df)

bronze_encounter_df.write.jdbc(
    table="bronze.encounter_data",
    mode="overwrite",
    url=connection_url,
    properties=connection_config
)


In [None]:
bronze_condition1_df =  spark.read.text("/home/snowblade/visible/MimicCondition.ndjson.gz")
bronze_condition2_df =  spark.read.text("/home/snowblade/visible/MimicConditionED.ndjson.gz")
bronze_condition_df =  bronze_condition1_df.union(bronze_condition2_df)

bronze_condition_df.write.jdbc(
    table="bronze.condition_data",
    mode="overwrite",
    url=connection_url,
    properties=connection_config
)


In [None]:
bronze_procedure1_df =  spark.read.text("/home/snowblade/visible/MimicProcedure.ndjson.gz")
bronze_procedure2_df =  spark.read.text("/home/snowblade/visible/MimicProcedureED.ndjson.gz")
bronze_procedure_df =  bronze_procedure1_df.union(bronze_procedure2_df)

bronze_procedure_df.write.jdbc(
    table="bronze.procedure_data",
    mode="overwrite",
    url=connection_url,
    properties=connection_config
)


In [None]:
-- Check Total Data Load
SELECT 'bronze.patient_data', count(*) count from bronze.patient_data UNION 
SELECT 'bronze.encounter_data', count(*) count from bronze.encounter_data UNION 
SELECT 'bronze.condition_data', count(*) count from bronze.condition_data UNION 
SELECT 'bronze.procedure_data', count(*) count from bronze.procedure_data
ORDER BY 1;

### 2.2 Silver Layer