# Explore Embryoscope Clinic Data

This notebook helps you explore the silver layer for each clinic individually.

In [22]:
import duckdb
import pandas as pd
from pathlib import Path

pd.set_option('display.max_columns', None)

# List all clinic DBs
db_dir = Path('../../database')
clinic_dbs = sorted([p for p in db_dir.glob('embryoscope_*.db') if 'test' not in str(p) and 'huntington_data_lake' not in str(p)])
# clinic_dbs = sorted([p for p in db_dir.glob('embryoscope_*') if 'test' in str(p) and 'huntington_data_lake' not in str(p)])
clinic_dbs

[WindowsPath('../../database/embryoscope_belo_horizonte.db'),
 WindowsPath('../../database/embryoscope_brasilia.db'),
 WindowsPath('../../database/embryoscope_ibirapuera.db'),
 WindowsPath('../../database/embryoscope_vila_mariana.db')]

In [23]:
# Select a clinic DB to explore
clinic_idx = -1  # Change this index to select a different clinic
db_path = clinic_dbs[clinic_idx]
print(f'Exploring: {db_path}')

Exploring: ..\..\database\embryoscope_vila_mariana.db


In [24]:
# Connect to the selected DB
con = duckdb.connect(str(db_path))


In [25]:
# List all schemas in the database
schemas = con.execute("SELECT schema_name FROM information_schema.schemata").fetchdf()
schemas


Unnamed: 0,schema_name
0,bronze
1,main
2,silver
3,information_schema
4,main
5,pg_catalog
6,main


In [26]:
# List all tables in the silver schema
use_schema = 'bronze'
# use_schema = 'silver'
tables = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{use_schema}'").fetchdf()
tables

Unnamed: 0,table_name
0,raw_embryo_data
1,raw_idascore
2,raw_patients
3,raw_treatments


In [27]:
# Show schema and sample for each table in silver
for table in tables['table_name']:
    print(f'\n=== {table} ===')
    schema = con.execute(f'PRAGMA table_info({use_schema}.{table})').fetchdf()
    display(schema)
    df = con.execute(f'SELECT * FROM {use_schema}.{table} LIMIT 5').fetchdf()
    display(df)
    lines = con.execute(f'SELECT COUNT(*) FROM {use_schema}.{table}').fetchone()
    print(f'Records: {lines}')
con.close()


=== raw_embryo_data ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,EmbryoID,VARCHAR,False,,False
1,1,PatientIDx,VARCHAR,False,,False
2,2,TreatmentName,VARCHAR,False,,False
3,3,raw_json,VARCHAR,False,,False
4,4,_extraction_timestamp,TIMESTAMP,False,,False
5,5,_run_id,VARCHAR,False,,False
6,6,_location,VARCHAR,False,,False
7,7,_row_hash,VARCHAR,False,,False


Unnamed: 0,EmbryoID,PatientIDx,TreatmentName,raw_json,_extraction_timestamp,_run_id,_location,_row_hash
0,D2024.04.16_S02839_I3253_P-1,PC1R85KM_45398.3645717940,2024 - 511,"{""EmbryoID"": ""D2024.04.16_S02839_I3253_P-1"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,6f39c4de195dc9f94e1de9ab34aa95a8
1,D2024.04.16_S02839_I3253_P-2,PC1R85KM_45398.3645717940,2024 - 511,"{""EmbryoID"": ""D2024.04.16_S02839_I3253_P-2"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,cb7be316338f2604ca242fd7e5afbc52
2,D2023.10.17_S02518_I3253_P-1,PC10T4L7_44652.7123803819,2023 - 1683,"{""EmbryoID"": ""D2023.10.17_S02518_I3253_P-1"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,b0ead98ca843663654fa4f1518e3bedb
3,D2023.10.17_S02518_I3253_P-2,PC10T4L7_44652.7123803819,2023 - 1683,"{""EmbryoID"": ""D2023.10.17_S02518_I3253_P-2"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,3968937a557fe6a097d4ce2b45b62003
4,D2023.10.17_S02518_I3253_P-3,PC10T4L7_44652.7123803819,2023 - 1683,"{""EmbryoID"": ""D2023.10.17_S02518_I3253_P-3"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,46f64d129ef909d24650cf5af50af951


Records: (30752,)

=== raw_idascore ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,EmbryoID,VARCHAR,False,,False
1,1,raw_json,VARCHAR,False,,False
2,2,_extraction_timestamp,TIMESTAMP,False,,False
3,3,_run_id,VARCHAR,False,,False
4,4,_location,VARCHAR,False,,False
5,5,_row_hash,VARCHAR,False,,False


Unnamed: 0,EmbryoID,raw_json,_extraction_timestamp,_run_id,_location,_row_hash
0,D2019.06.19_S00028_I3253_P-1,"{""EmbryoID"": ""D2019.06.19_S00028_I3253_P-1"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,212cbf29a4c410bb654943824050d577
1,D2019.06.19_S00028_I3253_P-2,"{""EmbryoID"": ""D2019.06.19_S00028_I3253_P-2"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,87d5649e334849ef00382e85bf0d479c
2,D2019.06.19_S00028_I3253_P-3,"{""EmbryoID"": ""D2019.06.19_S00028_I3253_P-3"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,04ba8020f5111c86dca22000e6e8736e
3,D2019.06.19_S00028_I3253_P-4,"{""EmbryoID"": ""D2019.06.19_S00028_I3253_P-4"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,3951e556b9a198c43620aba73ee02db4
4,D2019.07.03_S00054_I3253_P-2,"{""EmbryoID"": ""D2019.07.03_S00054_I3253_P-2"", ""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,39ca4e212b20cebf31f0e24049f6c851


Records: (9539,)

=== raw_patients ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PatientIDx,VARCHAR,False,,False
1,1,raw_json,VARCHAR,False,,False
2,2,_extraction_timestamp,TIMESTAMP,False,,False
3,3,_run_id,VARCHAR,False,,False
4,4,_location,VARCHAR,False,,False
5,5,_row_hash,VARCHAR,False,,False


Unnamed: 0,PatientIDx,raw_json,_extraction_timestamp,_run_id,_location,_row_hash
0,NEXTGEN_43622.7870662732,"{""PatientIDx"": ""NEXTGEN_43622.7870662732"", ""Pa...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,ce7eeef44db2e45a9ee560d50c47571b
1,PC10T4L72760_43623.4196205208,"{""PatientIDx"": ""PC10T4L72760_43623.4196205208""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,0e80d8bb37d49d69dcc2db6fdd119e1f
2,NEXTGEN_43622.6655321528,"{""PatientIDx"": ""NEXTGEN_43622.6655321528"", ""Pa...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,b2ee556e26653f7395cf5fa49254e673
3,PC10T4L77647_43623.5757282639,"{""PatientIDx"": ""PC10T4L77647_43623.5757282639""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,989bc98e74cf36c4b2b782b2023b1914
4,PC10T4L790165_43625.4810629630,"{""PatientIDx"": ""PC10T4L790165_43625.4810629630...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,ae933c1906afa46920a048bd89f3004f


Records: (2654,)

=== raw_treatments ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PatientIDx,VARCHAR,False,,False
1,1,TreatmentName,VARCHAR,False,,False
2,2,raw_json,VARCHAR,False,,False
3,3,_extraction_timestamp,TIMESTAMP,False,,False
4,4,_run_id,VARCHAR,False,,False
5,5,_location,VARCHAR,False,,False
6,6,_row_hash,VARCHAR,False,,False


Unnamed: 0,PatientIDx,TreatmentName,raw_json,_extraction_timestamp,_run_id,_location,_row_hash
0,NEXTGEN_43622.7870662732,1icsi,"{""PatientIDx"": ""NEXTGEN_43622.7870662732"", ""Tr...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,547067ba7f7c02d100f6c2017b566648
1,NEXTGEN_43622.7870662732,2022 - 12,"{""PatientIDx"": ""NEXTGEN_43622.7870662732"", ""Tr...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,29e103fd7f34cd3e376b53ffc7f779c3
2,NEXTGEN_43622.7870662732,Test August,"{""PatientIDx"": ""NEXTGEN_43622.7870662732"", ""Tr...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,229cf5bb2472fdb0fc43e68b7081934f
3,PC10T4L72760_43623.4196205208,1,"{""PatientIDx"": ""PC10T4L72760_43623.4196205208""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,a93a119ac31ba224d63babb8dad94fa9
4,PC10T4L72760_43623.4196205208,2,"{""PatientIDx"": ""PC10T4L72760_43623.4196205208""...",2025-07-15 19:21:41.460065,5a1a0509-7c67-43d2-a882-27ef8fcbf469,Vila Mariana,3285ffa98d446612099c62bf105f9cf9


Records: (3530,)


In [28]:
# df['raw_json'].values