### Bronze Layer – Raw Data Ingestion

This notebook ingests raw healthcare CSV datasets from Databricks Volumes
and stores them as Bronze Delta tables without any transformation.

In [0]:
# Volume paths
RAW_VOLUME_PATH = "/Volumes/healthcare/bronze/raw_volume"

DIABETIC_DATA_PATH = f"{RAW_VOLUME_PATH}/diabetic_data.csv"
IDS_MAPPING_PATH = f"{RAW_VOLUME_PATH}/IDS_mapping.csv"

In [0]:
# Read diabetic_data.csv
diabetic_df = (
    spark.read
    .option("header", True)
    .option("inferSchema", True)
    .csv(DIABETIC_DATA_PATH)
)

diabetic_df.printSchema()
diabetic_df.show(5)

root
 |-- encounter_id: integer (nullable = true)
 |-- patient_nbr: integer (nullable = true)
 |-- race: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- age: string (nullable = true)
 |-- weight: string (nullable = true)
 |-- admission_type_id: integer (nullable = true)
 |-- discharge_disposition_id: integer (nullable = true)
 |-- admission_source_id: integer (nullable = true)
 |-- time_in_hospital: integer (nullable = true)
 |-- payer_code: string (nullable = true)
 |-- medical_specialty: string (nullable = true)
 |-- num_lab_procedures: integer (nullable = true)
 |-- num_procedures: integer (nullable = true)
 |-- num_medications: integer (nullable = true)
 |-- number_outpatient: integer (nullable = true)
 |-- number_emergency: integer (nullable = true)
 |-- number_inpatient: integer (nullable = true)
 |-- diag_1: string (nullable = true)
 |-- diag_2: string (nullable = true)
 |-- diag_3: string (nullable = true)
 |-- number_diagnoses: integer (nullable = true)
 |-

In [0]:
# Read IDs_mapping.csv
ids_mapping_df = (
    spark.read
    .option("header", True)
    .option("inferSchema", True)
    .csv(IDS_MAPPING_PATH)
)

ids_mapping_df.printSchema()
ids_mapping_df.show(5)

root
 |-- admission_type_id: string (nullable = true)
 |-- description: string (nullable = true)

+-----------------+-------------+
|admission_type_id|  description|
+-----------------+-------------+
|                1|    Emergency|
|                2|       Urgent|
|                3|     Elective|
|                4|      Newborn|
|                5|Not Available|
+-----------------+-------------+
only showing top 5 rows


In [0]:
# Bronze Table – Patient Encounters
(
    diabetic_df
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("healthcare.bronze.patient_encounters_raw")
)

In [0]:
# Bronze Table – Code Mapping
(
    ids_mapping_df
    .write
    .format("delta")
    .mode("overwrite")
    .saveAsTable("healthcare.bronze.code_mapping_raw")
)

In [0]:
%sql
SELECT COUNT(*) FROM healthcare.bronze.patient_encounters_raw;

COUNT(*)
101766


In [0]:
%sql
SELECT COUNT(*) FROM healthcare.bronze.code_mapping_raw;

COUNT(*)
67


In [0]:
%sql
SELECT * 
FROM healthcare.bronze.patient_encounters_raw
LIMIT 5;

encounter_id,patient_nbr,race,gender,age,weight,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,payer_code,medical_specialty,num_lab_procedures,num_procedures,num_medications,number_outpatient,number_emergency,number_inpatient,diag_1,diag_2,diag_3,number_diagnoses,max_glu_serum,A1Cresult,metformin,repaglinide,nateglinide,chlorpropamide,glimepiride,acetohexamide,glipizide,glyburide,tolbutamide,pioglitazone,rosiglitazone,acarbose,miglitol,troglitazone,tolazamide,examide,citoglipton,insulin,glyburide-metformin,glipizide-metformin,glimepiride-pioglitazone,metformin-rosiglitazone,metformin-pioglitazone,change,diabetesMed,readmitted
2278392,8222157,Caucasian,Female,[0-10),?,6,25,1,1,?,Pediatrics-Endocrinology,41,0,1,0,0,0,250.83,?,?,1,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,NO
149190,55629189,Caucasian,Female,[10-20),?,1,1,7,3,?,?,59,0,18,0,0,0,276.0,250.01,255,9,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,No,Ch,Yes,>30
64410,86047875,AfricanAmerican,Female,[20-30),?,1,1,7,2,?,?,11,5,13,2,0,1,648.0,250,V27,6,,,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Yes,NO
500364,82442376,Caucasian,Male,[30-40),?,1,1,7,2,?,?,44,1,16,0,0,0,8.0,250.43,403,7,,,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,No,Up,No,No,No,No,No,Ch,Yes,NO
16680,42519267,Caucasian,Male,[40-50),?,1,1,7,1,?,?,51,0,8,0,0,0,197.0,157,250,5,,,No,No,No,No,No,No,Steady,No,No,No,No,No,No,No,No,No,No,Steady,No,No,No,No,No,Ch,Yes,NO


In [0]:
%sql
SELECT * 
FROM healthcare.bronze.code_mapping_raw
LIMIT 5;

admission_type_id,description
1,Emergency
2,Urgent
3,Elective
4,Newborn
5,Not Available
