In [1]:
import sys
sys.path.append('..')

from src.config import SnowflakeConfig, validate_config
from src.utils.snowflake_helper import SnowflakeHelper
# from src.data.dummy_data_generator import HospitalDataGenerator
from src.data.enhanced_dummy_data_generator import EnhancedHospitalDataGenerator

# Validate configuration
validate_config()

✓ Configuration validated successfully


### Connect to Snowflake

In [2]:
config = SnowflakeConfig()
sf_helper = SnowflakeHelper(config.get_connection_params())
session = sf_helper.connect()

✓ Connected to Snowflake as harismad
  Role: "DEV_ROLE"
  Warehouse: "TEST_WAREHOUSE"
  Database: "TEST_DATABASE"
  Schema: "TEST_SCHEMA"


### Generate Dummy Data

In [3]:
# Initialize the enhanced generator
generator = EnhancedHospitalDataGenerator(seed=42)

# Generate all data with proper relationships
print("Generating all hospital data...")
hospital_data = generator.generate_all_data()

# Extract individual dataframes
sop_data = hospital_data["hospital_sop"]
schedule_data = hospital_data["doctor_schedule"]
facility_data = hospital_data["hospital_facilities"]
appointments_data = hospital_data["appointments"]

# Display samples
print("\n=== SOP Data Sample ===")
print(sop_data.head(5))

print("\n=== Doctor Schedule Sample ===")
print(schedule_data.head(5))

print("\n=== Facility Data Sample ===")
print(facility_data.head(5))

print("\n=== Appointments Data Sample ===")
print(appointments_data.head(5))

Generating all hospital data...
Generating SOP data...
Generating doctor schedules...
Generating facility data...
Generating appointments...

=== SOP Data Sample ===
     SOP_ID  SOP_CATEGORY                             SOP_TITLE  \
0  SOP-0001  Patient Care            Triage Assessment Protocol   
1  SOP-0002  Patient Care             Patient Admission Process   
2  SOP-0003  Patient Care  Medication Administration Guidelines   
3  SOP-0004  Patient Care        Patient Transfer Between Units   
4  SOP-0005  Patient Care            Patient Discharge Planning   

                                         SOP_CONTENT DEPARTMENT  \
0  Perform systematic patient assessment using ES...  Emergency   
1  Verify patient identity using two identifiers....  Emergency   
2  Follow five rights: right patient, medication,...  Emergency   
3  Obtain transfer order from physician. Ensure a...  Emergency   
4  Initiate discharge planning within 24 hours of...  Emergency   

                LAST_UPDATED

### Load Data into Snowflake

In [4]:
# Load data to Snowflake tables
print("\nLoading data to Snowflake...")
sf_helper.load_data_to_table(sop_data, "hospital_sop", overwrite=True)
sf_helper.load_data_to_table(schedule_data, "doctor_schedule", overwrite=True)
sf_helper.load_data_to_table(facility_data, "hospital_facilities", overwrite=True)
sf_helper.load_data_to_table(appointments_data, "appointments", overwrite=True)

print("✓ All data loaded successfully!")


Loading data to Snowflake...
✓ Loaded 40 rows to hospital_sop
✓ Loaded 95 rows to doctor_schedule
✓ Loaded 40 rows to hospital_facilities
✓ Loaded 167 rows to appointments
✓ All data loaded successfully!


### Verify Data Load

In [5]:
# Verify data counts
query_sop = "SELECT COUNT(*) as count FROM hospital_sop"
query_schedule = "SELECT COUNT(*) as count FROM doctor_schedule"
query_facility = "SELECT COUNT(*) as count FROM hospital_facilities"
query_appointments = "SELECT COUNT(*) as count FROM appointments"

print(f"\n=== Record Counts ===")
print(f"SOP records: {sf_helper.execute_query(query_sop)['COUNT'].iloc[0]}")
print(f"Schedule records: {sf_helper.execute_query(query_schedule)['COUNT'].iloc[0]}")
print(f"Facility records: {sf_helper.execute_query(query_facility)['COUNT'].iloc[0]}")
print(f"Appointments records: {sf_helper.execute_query(query_appointments)['COUNT'].iloc[0]}")


=== Record Counts ===
SOP records: 40
Schedule records: 95
Facility records: 40
Appointments records: 167


### Sample Query

In [6]:
# Sample queries to verify data quality
print("\n=== Sample Query: Patient Care SOPs ===")
query = """
SELECT SOP_TITLE, SOP_CATEGORY, DEPARTMENT
FROM hospital_sop
WHERE SOP_CATEGORY = 'Patient Care'
LIMIT 5
"""
result = sf_helper.execute_query(query)
print(result)

print("\n=== Sample Query: Doctor Schedules ===")
query = """
SELECT DOCTOR_NAME, SPECIALIZATION, DAY_OF_WEEK,
       START_TIME, END_TIME, MAX_PATIENTS, BOOKED_PATIENTS
FROM doctor_schedule
WHERE DAY_OF_WEEK = 'Monday'
LIMIT 5
"""
result = sf_helper.execute_query(query)
print(result)

print("\n=== Sample Query: Upcoming Appointments ===")
query = """
SELECT a.APPOINTMENT_ID, a.APPOINTMENT_DATE, a.APPOINTMENT_TIME,
       d.DOCTOR_NAME, d.SPECIALIZATION, a.STATUS
FROM appointments a
JOIN doctor_schedule d ON a.SCHEDULE_ID = d.SCHEDULE_ID
WHERE a.APPOINTMENT_DATE >= CURRENT_DATE()
  AND a.STATUS = 'SCHEDULED'
ORDER BY a.APPOINTMENT_DATE, a.APPOINTMENT_TIME
LIMIT 10
"""
result = sf_helper.execute_query(query)
print(result)

print("\n=== Sample Query: Facility Utilization ===")
query = """
SELECT FACILITY_TYPE,
       COUNT(*) as total_facilities,
       SUM(CASE WHEN STATUS = 'OPERATIONAL' THEN 1 ELSE 0 END) as operational,
       ROUND(AVG(CURRENT_USAGE * 100.0 / CAPACITY), 2) as avg_utilization_pct
FROM hospital_facilities
WHERE CAPACITY > 0
GROUP BY FACILITY_TYPE
ORDER BY total_facilities DESC
"""
result = sf_helper.execute_query(query)
print(result)


=== Sample Query: Patient Care SOPs ===
                              SOP_TITLE  SOP_CATEGORY DEPARTMENT
0            Triage Assessment Protocol  Patient Care  Emergency
1             Patient Admission Process  Patient Care  Emergency
2  Medication Administration Guidelines  Patient Care  Emergency
3        Patient Transfer Between Units  Patient Care  Emergency
4            Patient Discharge Planning  Patient Care  Emergency

=== Sample Query: Doctor Schedules ===
          DOCTOR_NAME        SPECIALIZATION DAY_OF_WEEK START_TIME  END_TIME  \
0  Dr. Siti Rahmawati  General Practitioner      Monday   09:00:00  12:00:00   
1    Dr. Citra Kusuma          Cardiologist      Monday   13:00:00  17:00:00   
2   Dr. Ahmad Santoso          Pediatrician      Monday   13:00:00  16:00:00   
3    Dr. Nina Puspita         Dermatologist      Monday   08:00:00  11:00:00   
4   Dr. Indah Lestari          Gynecologist      Monday   08:00:00  12:00:00   

   MAX_PATIENTS  BOOKED_PATIENTS  
0            