# Explore Vila Mariana Patients & Treatments

This notebook lets you list all patients in the bronze layer of the Vila Mariana DB, select which patients to fetch treatments for, and display those treatments. Edit the patient list and re-run as needed.

In [1]:
import duckdb
import pandas as pd
from pathlib import Path

# Path to Vila Mariana DB
db_path = Path('../database/embryoscope_vila_mariana.db')
con = duckdb.connect(str(db_path))

In [2]:
# List all patients in bronze layer
patients_df = con.execute("SELECT PatientIDx, raw_json FROM bronze.raw_patients").fetchdf()
# Try to extract patient name if present in raw_json
def extract_name(raw_json):
    try:
        import json
        return json.loads(raw_json).get('PatientName', None)
    except Exception:
        return None
patients_df['PatientName'] = patients_df['raw_json'].apply(extract_name)
patients_df = patients_df[['PatientIDx', 'PatientName']]
display(patients_df)

Unnamed: 0,PatientIDx,PatientName
0,NEXTGEN_43622.7870662732,
1,PC10T4L72760_43623.4196205208,
2,NEXTGEN_43622.6655321528,
3,PC10T4L77647_43623.5757282639,
4,PC10T4L790165_43625.4810629630,
...,...,...
2646,PC1R85KM_45837.3836713889,
2647,PC1R85KM_45808.6028175694,
2648,PC1R85KM_45852.7453160185,
2649,PC1R85KM_45853.3479147569,


In [4]:
# === Select patients to fetch treatments for ===
# Edit this list to choose which patients to fetch treatments for
selected_patient_ids = patients_df['PatientIDx'].tail(10).tolist()
print('Selected patients:', selected_patient_ids)

Selected patients: ['PC1R85KM_45849.5107237963', 'PC1R85KM_45850.3306940162', 'PC1R85KM_45851.4561169792', 'PC1R85KM_45511.6521238194', 'PC1R85KM_45395.4435693634', 'PC1R85KM_45837.3836713889', 'PC1R85KM_45808.6028175694', 'PC1R85KM_45852.7453160185', 'PC1R85KM_45853.3479147569', 'PC1R85KM_45853.4072312847']


In [5]:
# Fetch treatments for selected patients
if selected_patient_ids:
    placeholders = ','.join(['?' for _ in selected_patient_ids])
    query = f"SELECT * FROM bronze.raw_treatments WHERE PatientIDx IN ({placeholders})"
    treatments_df = con.execute(query, selected_patient_ids).fetchdf()
    display(treatments_df)
    print(f'Found {len(treatments_df)} treatments.')
else:
    print('No patients selected.')

Unnamed: 0,PatientIDx,TreatmentName,raw_json,_extraction_timestamp,_run_id,_location,_row_hash


Found 0 treatments.


In [None]:
# Close connection when done
con.close()