# Explore Embryoscope Clinic Data

This notebook helps you explore the silver layer for each clinic individually.

In [50]:
import duckdb
import pandas as pd
from pathlib import Path

pd.set_option('display.max_columns', None)

# List all clinic DBs
db_dir = Path('../../database')
clinic_dbs = sorted([p for p in db_dir.glob('embryoscope_*.db') if 'test' not in str(p) and 'huntington_data_lake' not in str(p)])
# clinic_dbs = sorted([p for p in db_dir.glob('embryoscope_*') if 'test' in str(p) and 'huntington_data_lake' not in str(p)])
clinic_dbs

[WindowsPath('../../database/embryoscope_belo_horizonte.db'),
 WindowsPath('../../database/embryoscope_brasilia.db'),
 WindowsPath('../../database/embryoscope_ibirapuera.db'),
 WindowsPath('../../database/embryoscope_vila_mariana.db')]

In [51]:
# Select a clinic DB to explore
clinic_idx = 0  # Change this index to select a different clinic
db_path = clinic_dbs[clinic_idx]
print(f'Exploring: {db_path}')

Exploring: ..\..\database\embryoscope_belo_horizonte.db


In [52]:
# Connect to the selected DB
con = duckdb.connect(str(db_path))


In [53]:
# List all schemas in the database
schemas = con.execute("SELECT schema_name FROM information_schema.schemata").fetchdf()
schemas


Unnamed: 0,schema_name
0,bronze
1,main
2,silver
3,information_schema
4,main
5,pg_catalog
6,main


In [54]:
# List all tables in the silver schema
use_schema = 'bronze'
use_schema = 'silver'
tables = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{use_schema}'").fetchdf()
tables

Unnamed: 0,table_name
0,embryo_data
1,idascore
2,patients
3,treatments


In [55]:
# Show schema and sample for each table in silver
for table in tables['table_name']:
    print(f'\n=== {table} ===')
    schema = con.execute(f'PRAGMA table_info({use_schema}.{table})').fetchdf()
    display(schema)
    df = con.execute(f'SELECT * FROM {use_schema}.{table} LIMIT 5').fetchdf()
    display(df)
    lines = con.execute(f'SELECT COUNT(*) FROM {use_schema}.{table}').fetchone()
    print(f'Records: {lines}')
con.close()


=== embryo_data ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,EmbryoID,VARCHAR,False,,False
1,1,PatientIDx,VARCHAR,False,,False
2,2,TreatmentName,VARCHAR,False,,False
3,3,KIDDate,TIMESTAMP_NS,False,,False
4,4,KIDScore,VARCHAR,False,,False
...,...,...,...,...,...,...
157,157,WellNumber,BIGINT,False,,False
158,158,_extraction_timestamp,TIMESTAMP_NS,False,,False
159,159,_location,VARCHAR,False,,False
160,160,_row_hash,VARCHAR,False,,False


Unnamed: 0,EmbryoID,PatientIDx,TreatmentName,KIDDate,KIDScore,KIDUser,KIDVersion,Description,EmbryoDescriptionID,EmbryoFate,FertilizationTime,InstrumentNumber,Name_BlastomereSize,Name_Comment,Name_DynamicScore,Name_Ellipse,Name_Fragmentation,Name_ICM,Name_IrregularDivision,Name_Line,Name_MultiNucleation,Name_Nuclei,Name_PN,Name_TE,Name_USRVAR_1_RC,Name_USRVAR_2_FD,Name_USRVAR_3_D1-3,Name_USRVAR_4_D2+,Name_USRVAR_5_PULSING,Name_ZScore,Name_t2,Name_t3,Name_t4,Name_t5,Name_t6,Name_t7,Name_t8,Name_t9,Name_tB,Name_tDead,Name_tEB,Name_tHB,Name_tM,Name_tPB2,Name_tPNa,Name_tPNf,Name_tSB,Name_tSC,Position,Time_BlastomereSize,Time_Comment,Time_DynamicScore,Time_Ellipse,Time_Fragmentation,Time_ICM,Time_IrregularDivision,Time_Line,Time_MultiNucleation,Time_Nuclei,Time_PN,Time_TE,Time_USRVAR_1_RC,Time_USRVAR_2_FD,Time_USRVAR_3_D1-3,Time_USRVAR_4_D2+,Time_USRVAR_5_PULSING,Time_ZScore,Time_t2,Time_t3,Time_t4,Time_t5,Time_t6,Time_t7,Time_t8,Time_t9,Time_tB,Time_tDead,Time_tEB,Time_tHB,Time_tM,Time_tPB2,Time_tPNa,Time_tPNf,Time_tSB,Time_tSC,Timestamp_BlastomereSize,Timestamp_Comment,Timestamp_DynamicScore,Timestamp_Ellipse,Timestamp_Fragmentation,Timestamp_ICM,Timestamp_IrregularDivision,Timestamp_Line,Timestamp_MultiNucleation,Timestamp_Nuclei,Timestamp_PN,Timestamp_TE,Timestamp_USRVAR_1_RC,Timestamp_USRVAR_2_FD,Timestamp_USRVAR_3_D1-3,Timestamp_USRVAR_4_D2+,Timestamp_USRVAR_5_PULSING,Timestamp_ZScore,Timestamp_t2,Timestamp_t3,Timestamp_t4,Timestamp_t5,Timestamp_t6,Timestamp_t7,Timestamp_t8,Timestamp_t9,Timestamp_tB,Timestamp_tDead,Timestamp_tEB,Timestamp_tHB,Timestamp_tM,Timestamp_tPB2,Timestamp_tPNa,Timestamp_tPNf,Timestamp_tSB,Timestamp_tSC,Value_BlastomereSize,Value_Comment,Value_DynamicScore,Value_Ellipse,Value_Fragmentation,Value_ICM,Value_IrregularDivision,Value_Line,Value_MultiNucleation,Value_Nuclei,Value_PN,Value_TE,Value_USRVAR_1_RC,Value_USRVAR_2_FD,Value_USRVAR_3_D1-3,Value_USRVAR_4_D2+,Value_USRVAR_5_PULSING,Value_ZScore,Value_t2,Value_t3,Value_t4,Value_t5,Value_t6,Value_t7,Value_t8,Value_t9,Value_tB,Value_tDead,Value_tEB,Value_tHB,Value_tM,Value_tPB2,Value_tPNa,Value_tPNf,Value_tSB,Value_tSC,WellNumber,_extraction_timestamp,_location,_row_hash,_run_id
0,D2025.03.03_S02823_I3254_P-1,PC1PPEMH_45719.4711884144,03/03/2025,2025-03-12,1.7,CAMILA,KIDScoreD5 v3.3,,AA1,Freeze,2025-03-03 11:45:00,3254,BlastomereSize,,,,Fragmentation,,,,MultiNucleation,,PN,,,,,,,,t2,t3,t4,t5,,,t8,,tB,,,,tM,tPB2,tPNa,tPNf,tSB,tSC,11,37.8,,,,37.8,,,,37.8,,23.2,,,,,,,,28.0,39.7,40.1,52.2,,,56.7,,143.3,,,,114.1,4.8,11.8,25.8,120.5,98.0,2025.03.05 01:34:26,,,,2025.03.05 01:34:26,,,,2025.03.05 01:34:26,,2025.03.04 10:58:07,,,,,,,,2025.03.04 15:47:12,2025.03.05 03:29:47,2025.03.05 03:50:45,2025.03.05 15:55:48,,,2025.03.05 20:27:31,,2025.03.09 11:01:48,,,,2025.03.08 05:50:49,2025.03.03 16:35:52,2025.03.03 23:35:46,2025.03.04 13:30:55,2025.03.08 12:16:21,2025.03.07 13:44:49,Even,,,,5.0,,,,2.0,,2.0,,,,,,,,45720.6577872338,45721.145688044,45721.1602539583,45721.6637506134,,,45721.8524431366,,45725.4595886227,,,,45724.2436306481,45719.6915769907,45719.9831809954,45720.5631392361,45724.5113556019,45723.5727975347,1,2025-07-16 09:52:05.198485,Belo Horizonte,d39b3d5856f5cc375f856abc257b4572,0c6e13e8-5703-46f6-a6b8-f48d7c4e1ebc
1,D2025.03.03_S02823_I3254_P-2,PC1PPEMH_45719.4711884144,03/03/2025,2025-03-12,,CAMILA,KIDScoreD5 v3.3,,AA2,Avoid,2025-03-03 11:45:00,3254,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,11,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2025-07-16 09:52:05.198485,Belo Horizonte,e80ebcc094403684e96b097e29ff80ff,0c6e13e8-5703-46f6-a6b8-f48d7c4e1ebc
2,D2021.12.03_S01224_I3254_P-1,PC10T4JJ_44533.3382186921,03/12/2021,2021-12-14,8.1,CAMILA,KIDScoreD5 v3.1,,AA1,Freeze,2021-12-03 10:50:00,3254,BlastomereSize,,,,Fragmentation,ICM,,,MultiNucleation,,PN,TE,,,,,,,t2,,t4,t5,,,t8,,tB,,,,tM,tPB2,tPNa,tPNf,tSB,tSC,3,32.8,,,,32.8,116.4,,,32.8,,17.3,116.4,,,,,,,25.1,,36.8,48.6,,,51.4,,106.5,,,,94.6,4.7,9.3,21.8,103.3,90.0,2021.12.04 19:38:57,,,,2021.12.04 19:38:57,2021.12.08 07:16:14,,,2021.12.04 19:38:57,,2021.12.04 04:05:02,2021.12.08 07:16:14,,,,,,,2021.12.04 11:54:30,,2021.12.04 23:36:20,2021.12.05 11:28:20,,,2021.12.05 14:13:30,,2021.12.07 21:18:39,,,,2021.12.07 09:27:07,2021.12.03 15:33:03,2021.12.03 20:09:05,2021.12.04 08:36:31,2021.12.07 18:09:26,2021.12.07 04:51:10,Even,,,,5.0,A,,,0.0,,2.0,A,,,,,,,44534.4961837731,,44534.9835733681,44535.4780179514,,,44535.5927097338,,44537.8879623843,,,,44537.3938358681,44533.6479518634,44533.8396497338,44534.3586975116,44537.7565515741,44537.2022052778,1,2025-07-16 09:52:05.198485,Belo Horizonte,03203b35a5dd6adef832ebc5e5f2a8b4,0c6e13e8-5703-46f6-a6b8-f48d7c4e1ebc
3,D2021.12.03_S01224_I3254_P-2,PC10T4JJ_44533.3382186921,03/12/2021,2021-12-14,,CAMILA,KIDScoreD5 v3.1,,AA2,Avoid,2021-12-03 10:50:00,3254,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2025-07-16 09:52:05.198485,Belo Horizonte,f0363b503521552319e17ade9c44f866,0c6e13e8-5703-46f6-a6b8-f48d7c4e1ebc
4,D2020.06.22_S00493_I3254_P-1,PC10T4JJ32217_44004.3629659606,22/06/2020,2020-07-20,,THAIS,KIDScoreD5 v3,,AA1,Avoid,2020-06-22 11:00:00,3254,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,2025-07-16 09:52:05.198485,Belo Horizonte,e5de4be0b208d8901747be36b0b50ddc,0c6e13e8-5703-46f6-a6b8-f48d7c4e1ebc


Records: (22594,)

=== idascore ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,EmbryoID,VARCHAR,False,,False
1,1,IDAScore,VARCHAR,False,,False
2,2,IDATime,VARCHAR,False,,False
3,3,IDAVersion,VARCHAR,False,,False
4,4,IDATimestamp,VARCHAR,False,,False
5,5,_extraction_timestamp,TIMESTAMP,False,,False
6,6,_location,VARCHAR,False,,False
7,7,_run_id,VARCHAR,False,,False
8,8,_row_hash,VARCHAR,False,,False


Unnamed: 0,EmbryoID,IDAScore,IDATime,IDAVersion,IDATimestamp,_extraction_timestamp,_location,_run_id,_row_hash


Records: (0,)

=== patients ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PatientIDx,VARCHAR,False,,False
1,1,PatientID,VARCHAR,False,,False
2,2,FirstName,VARCHAR,False,,False
3,3,LastName,VARCHAR,False,,False
4,4,DateOfBirth,TIMESTAMP_NS,False,,False
5,5,_extraction_timestamp,TIMESTAMP_NS,False,,False
6,6,_location,VARCHAR,False,,False
7,7,_run_id,VARCHAR,False,,False
8,8,_row_hash,VARCHAR,False,,False


Unnamed: 0,PatientIDx,PatientID,FirstName,LastName,DateOfBirth,_extraction_timestamp,_location,_run_id,_row_hash
0,PC10T4JJ1843_43630.3958359954,40.438,"SIQUEIRA, LARA MARIA A B",25/10/1986,1986-10-01,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,6c2db7f1da7799605c9ab9caa9df079c
1,PC10T4JJ54678_43630.6286568634,44283.0,"CALDEIRA, JULIANA IMACULADA FERREIRA",01/07/1982,1982-07-01,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,c0f87912fced367224b8e1e71c6fe305
2,PC10T4JJ55173_43629.4403575000,54986.0,Isabel Cristina da,Silva,1979-07-01,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,3870e802141f50a3efaeba0569c1fdba
3,PC10T4JJ54678_43631.4704660995,50968.0,"PIRES, FABIANNE GOMES GASPAR BRANDAO",16/11/1979,1998-11-01,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,9229f5e309b4c8c01e78b25213a124f8
4,PC10T4JJ54678_43631.4759475810,53102.0,"RODRIGUES, ENARA FERREIRA",01/06/1985,1985-06-01,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,b0a545cf6c293abe1bf05887097db3d3


Records: (2258,)

=== treatments ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PatientIDx,VARCHAR,False,,False
1,1,TreatmentName,VARCHAR,False,,False
2,2,_extraction_timestamp,VARCHAR,False,,False
3,3,_location,VARCHAR,False,,False
4,4,_run_id,VARCHAR,False,,False
5,5,_row_hash,VARCHAR,False,,False


Unnamed: 0,PatientIDx,TreatmentName,_extraction_timestamp,_location,_run_id,_row_hash
0,PC10T4JJ1843_43630.3958359954,2019-500,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,ac943a8f1bd4f7d70dc4a4d0e45fb355
1,PC10T4JJ55173_43629.4403575000,13/06/2019,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,8b3b79801bddb6ad1d30a33a4411d75a
2,PC10T4JJ54678_43630.6286568634,14/06/2019,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,72a51e5f717c872fd7b3bc16f01bd68f
3,PC10T4JJ54678_43631.4704660995,15/06/2019,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,89f22af8bbd584fed1b9413fcfaf1ad3
4,PC10T4JJ54678_43631.4759475810,15/06/2019,2025-07-16 09:49:20.941048,Belo Horizonte,e3be923f-dce1-4393-bf89-0574d3a56b74,a56b4f5349514199647f1d8c220137a9


Records: (2926,)


In [56]:
# df['raw_json'].values