# Explore Embryoscope Clinic Data

This notebook helps you explore the silver layer for each clinic individually.

In [8]:
import duckdb
import pandas as pd
from pathlib import Path

pd.set_option('display.max_columns', None)

# List all clinic DBs
db_dir = Path('../../database')
clinic_dbs = sorted([p for p in db_dir.glob('embryoscope_*.db') if 'test' not in str(p) and 'huntington_data_lake' not in str(p)])
# clinic_dbs = sorted([p for p in db_dir.glob('embryoscope_*') if 'test' in str(p) and 'huntington_data_lake' not in str(p)])
clinic_dbs

[WindowsPath('../../database/embryoscope_belo_horizonte.db'),
 WindowsPath('../../database/embryoscope_brasilia.db'),
 WindowsPath('../../database/embryoscope_ibirapuera.db'),
 WindowsPath('../../database/embryoscope_vila_mariana.db')]

In [9]:
# Select a clinic DB to explore
clinic_idx = -1  # Change this index to select a different clinic
db_path = clinic_dbs[clinic_idx]
print(f'Exploring: {db_path}')

Exploring: ..\..\database\embryoscope_vila_mariana.db


In [10]:
# Connect to the selected DB
con = duckdb.connect(str(db_path))


In [11]:
# List all schemas in the database
schemas = con.execute("SELECT schema_name FROM information_schema.schemata").fetchdf()
schemas


Unnamed: 0,schema_name
0,bronze
1,main
2,silver
3,information_schema
4,main
5,pg_catalog
6,main


In [12]:
# List all tables in the silver schema
use_schema = 'bronze'
use_schema = 'silver'
tables = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{use_schema}'").fetchdf()
tables

Unnamed: 0,table_name
0,embryo_data
1,idascore
2,patients
3,treatments


In [13]:
# Show schema and sample for each table in silver
for table in tables['table_name']:
    print(f'\n=== {table} ===')
    schema = con.execute(f'PRAGMA table_info({use_schema}.{table})').fetchdf()
    display(schema)
    df = con.execute(f'SELECT * FROM {use_schema}.{table} LIMIT 5').fetchdf()
    display(df)
    lines = con.execute(f'SELECT COUNT(*) FROM {use_schema}.{table}').fetchone()
    print(f'Records: {lines}')
con.close()


=== embryo_data ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,EmbryoID,VARCHAR,False,,False
1,1,PatientIDx,VARCHAR,False,,False
2,2,TreatmentName,VARCHAR,False,,False
3,3,KIDDate,TIMESTAMP_NS,False,,False
4,4,KIDScore,VARCHAR,False,,False
...,...,...,...,...,...,...
183,183,_extraction_timestamp,TIMESTAMP_NS,False,,False
184,184,_location,VARCHAR,False,,False
185,185,_row_hash,VARCHAR,False,,False
186,186,_run_id,VARCHAR,False,,False


Unnamed: 0,EmbryoID,PatientIDx,TreatmentName,KIDDate,KIDScore,KIDUser,KIDVersion,Description,EmbryoDescriptionID,EmbryoFate,FertilizationMethod,FertilizationTime,InstrumentNumber,Name_BlastExpandLast,Name_Comment,Name_DynamicScore,Name_EVEN2,Name_EVEN4,Name_EVEN8,Name_Ellipse,Name_FRAG2,Name_FRAG2CAT,Name_FRAG4,Name_FRAG8,Name_ICM,Name_Line,Name_MN2Type,Name_MorphologicalGrade,Name_MorphologicalGradeD5,Name_Nuclei2,Name_Nuclei4,Name_Nuclei8,Name_PN,Name_Pulsing,Name_Strings,Name_TE,Name_Text,Name_ZScore,Name_t2,Name_t3,Name_t4,Name_t5,Name_t6,Name_t7,Name_t8,Name_t9,Name_tB,Name_tEB,Name_tHB,Name_tM,Name_tPB2,Name_tPNa,Name_tPNf,Name_tSB,Name_tSC,Position,Time_BlastExpandLast,Time_Comment,Time_DynamicScore,Time_EVEN2,Time_EVEN4,Time_EVEN8,Time_Ellipse,Time_FRAG2,Time_FRAG2CAT,Time_FRAG4,Time_FRAG8,Time_ICM,Time_Line,Time_MN2Type,Time_MorphologicalGrade,Time_MorphologicalGradeD5,Time_Nuclei2,Time_Nuclei4,Time_Nuclei8,Time_PN,Time_Pulsing,Time_Strings,Time_TE,Time_Text,Time_ZScore,Time_t2,Time_t3,Time_t4,Time_t5,Time_t6,Time_t7,Time_t8,Time_t9,Time_tB,Time_tEB,Time_tHB,Time_tM,Time_tPB2,Time_tPNa,Time_tPNf,Time_tSB,Time_tSC,Timestamp_BlastExpandLast,Timestamp_Comment,Timestamp_DynamicScore,Timestamp_EVEN2,Timestamp_EVEN4,Timestamp_EVEN8,Timestamp_Ellipse,Timestamp_FRAG2,Timestamp_FRAG2CAT,Timestamp_FRAG4,Timestamp_FRAG8,Timestamp_ICM,Timestamp_Line,Timestamp_MN2Type,Timestamp_MorphologicalGrade,Timestamp_MorphologicalGradeD5,Timestamp_Nuclei2,Timestamp_Nuclei4,Timestamp_Nuclei8,Timestamp_PN,Timestamp_Pulsing,Timestamp_Strings,Timestamp_TE,Timestamp_Text,Timestamp_ZScore,Timestamp_t2,Timestamp_t3,Timestamp_t4,Timestamp_t5,Timestamp_t6,Timestamp_t7,Timestamp_t8,Timestamp_t9,Timestamp_tB,Timestamp_tEB,Timestamp_tHB,Timestamp_tM,Timestamp_tPB2,Timestamp_tPNa,Timestamp_tPNf,Timestamp_tSB,Timestamp_tSC,Value_BlastExpandLast,Value_Comment,Value_DynamicScore,Value_EVEN2,Value_EVEN4,Value_EVEN8,Value_Ellipse,Value_FRAG2,Value_FRAG2CAT,Value_FRAG4,Value_FRAG8,Value_ICM,Value_Line,Value_MN2Type,Value_MorphologicalGrade,Value_MorphologicalGradeD5,Value_Nuclei2,Value_Nuclei4,Value_Nuclei8,Value_PN,Value_Pulsing,Value_Strings,Value_TE,Value_Text,Value_ZScore,Value_t2,Value_t3,Value_t4,Value_t5,Value_t6,Value_t7,Value_t8,Value_t9,Value_tB,Value_tEB,Value_tHB,Value_tM,Value_tPB2,Value_tPNa,Value_tPNf,Value_tSB,Value_tSC,WellNumber,_extraction_timestamp,_location,_row_hash,_run_id,embryo_number
0,D2019.06.10_S00009_I3253_P-1,PC10T4L741253_43626.4449943171,05.06.2019,2020-02-13,,ADMIN,KIDScoreD5 v3,,AA1,Unknown,,2019-06-10 09:00:00,3253,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,t8,,,,,,tPB2,tPNa,tPNf,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,,,28.2,28.2,28.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019.06.10 10:58:10,,,,,,2019.06.11 13:14:24,2019.06.11 13:14:24,2019.06.11 13:14:24,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43626.4570676273,,,,,,43627.5516752546,43627.5516752546,43627.5516752546,,,1,2025-07-15 19:21:41.460065,Vila Mariana,278c0046f8670f26452f4e510ac40220,5a1a0509-7c67-43d2-a882-27ef8fcbf469,1
1,D2019.06.10_S00010_I3253_P-1,PC10T4L741253_43626.4518250926,05.06.2019,2020-02-13,,ADMIN,KIDScoreD5 v3,,AA1,Freeze,,2019-06-10 09:00:00,3253,,,,,,,,,,,,ICM,,,,,,,,,,,TE,,,t2,t3,t4,t5,t6,t7,t8,t9,tB,,,tM,,,,tSB,tSC,12,,,,,,,,,,,,7.3,,,,,,,,,,,75.2,,,3.8,3.9,4.3,4.6,4.8,5.0,5.2,5.5,7.0,,,6.2,,,,6.6,5.9,,,,,,,,,,,,2019.06.10 16:18:36,,,,,,,,,,,2019.06.13 12:11:48,,,2019.06.10 12:45:47,2019.06.10 12:56:25,2019.06.10 13:17:42,2019.06.10 13:38:59,2019.06.10 13:49:38,2019.06.10 14:00:16,2019.06.10 14:10:55,2019.06.10 14:32:12,2019.06.10 15:57:19,,,2019.06.10 15:14:46,,,,2019.06.10 15:36:03,2019.06.10 14:53:29,,,,,,,,,,,,B,,,,,,,,,,,C,,,43626.5317982986,43626.5391877778,43626.5539690393,43626.5687487269,43626.5761389352,43626.5835281597,43626.5909172801,43626.6056953472,43626.6648143519,,,43626.6352557523,,,,43626.650035625,43626.6204755903,1,2025-07-15 19:21:41.460065,Vila Mariana,ca9bf840e7dafb28bd86956b8ff3f460,5a1a0509-7c67-43d2-a882-27ef8fcbf469,2
2,D2019.06.10_S00010_I3253_P-2,PC10T4L741253_43626.4518250926,05.06.2019,2020-02-13,,ADMIN,KIDScoreD5 v3,,AA2,Freeze,,2019-06-10 09:00:00,3253,,,,,,,,,,,,,,,,,,,,PN,,,TE,,,t2,t3,t4,t5,,,,,tB,,,,,,,,,12,,,,,,,,,,,,,,,,,,,,2.5,,,15.3,,,4.5,5.9,7.0,8.7,,,,,12.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,2019.06.10 11:31:21,,,2019.06.11 00:17:30,,,2019.06.10 13:28:25,2019.06.10 14:53:32,2019.06.10 15:57:23,2019.06.10 17:43:48,,,,,2019.06.10 21:27:15,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,B,,,43626.5614030093,43626.6205191782,43626.6648577546,43626.7387525347,,,,,43626.8939260532,,,,,,,,,2,2025-07-15 19:21:41.460065,Vila Mariana,90439dee85f60cf0e42331d689db7bfc,5a1a0509-7c67-43d2-a882-27ef8fcbf469,3
3,D2019.06.10_S00009_I3253_P-9,PC10T4L741253_43626.4449943171,05.06.2019,2020-02-13,,ADMIN,KIDScoreD5 v3,,AA9,Unknown,,2019-06-10 09:00:00,3253,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,2025-07-15 19:21:41.460065,Vila Mariana,510ee700e35951039965a017de9d4c6c,5a1a0509-7c67-43d2-a882-27ef8fcbf469,4
4,D2019.06.10_S00009_I3253_P-10,PC10T4L741253_43626.4449943171,05.06.2019,2020-02-13,,ADMIN,KIDScoreD5 v3,,AA10,Unknown,,2019-06-10 09:00:00,3253,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10,2025-07-15 19:21:41.460065,Vila Mariana,132021d44be3c7f46d6e474ce66147b0,5a1a0509-7c67-43d2-a882-27ef8fcbf469,5


Records: (30946,)

=== idascore ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,EmbryoID,VARCHAR,False,,False
1,1,IDAScore,VARCHAR,False,,False
2,2,IDATime,VARCHAR,False,,False
3,3,IDAVersion,VARCHAR,False,,False
4,4,IDATimestamp,VARCHAR,False,,False
5,5,_extraction_timestamp,VARCHAR,False,,False
6,6,_location,VARCHAR,False,,False
7,7,_run_id,VARCHAR,False,,False
8,8,_row_hash,VARCHAR,False,,False


Unnamed: 0,EmbryoID,IDAScore,IDATime,IDAVersion,IDATimestamp,_extraction_timestamp,_location,_run_id,_row_hash
0,D2019.06.19_S00028_I3253_P-1,1.03,115,2.0.4,2024.04.30 07:55:37,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,212cbf29a4c410bb654943824050d577
1,D2019.06.19_S00028_I3253_P-2,1.05,115,2.0.4,2024.04.30 07:55:35,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,87d5649e334849ef00382e85bf0d479c
2,D2019.06.19_S00028_I3253_P-3,1.06,115,2.0.4,2024.04.30 07:55:33,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,04ba8020f5111c86dca22000e6e8736e
3,D2019.06.19_S00028_I3253_P-4,1.01,115,2.0.4,2024.04.30 07:56:06,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,3951e556b9a198c43620aba73ee02db4
4,D2019.07.03_S00054_I3253_P-2,3.93,120,2.0.4,2024.05.28 15:20:00,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,39ca4e212b20cebf31f0e24049f6c851


Records: (9562,)

=== patients ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PatientIDx,VARCHAR,False,,False
1,1,PatientID,BIGINT,False,,False
2,2,FirstName,VARCHAR,False,,False
3,3,LastName,VARCHAR,False,,False
4,4,_extraction_timestamp,TIMESTAMP_NS,False,,False
5,5,_location,VARCHAR,False,,False
6,6,_run_id,VARCHAR,False,,False
7,7,_row_hash,VARCHAR,False,,False
8,8,DateOfBirth,TIMESTAMP_NS,False,,False


Unnamed: 0,PatientIDx,PatientID,FirstName,LastName,_extraction_timestamp,_location,_run_id,_row_hash,DateOfBirth
0,NEXTGEN_43622.7870662732,1111,first,test,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,ce7eeef44db2e45a9ee560d50c47571b,NaT
1,PC10T4L72760_43623.4196205208,9999,Jane,Doe,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,0e80d8bb37d49d69dcc2db6fdd119e1f,NaT
2,PC10T4L790165_43625.4810629630,62399,Amanda Alves,Pollone,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,ae933c1906afa46920a048bd89f3004f,1983-12-01
3,PC10T4L741253_43626.4449943171,67549,"Silva, Vanessa",L.,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,37ce5a215c60a9ead635c2350594dd11,1980-05-01
4,PC10T4L741253_43626.4518250926,68786,"Silveira,",Gislaine,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,bdfdc5f751721a992f783e59f671a302,1978-11-01


Records: (2654,)

=== treatments ===


Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,PatientIDx,VARCHAR,False,,False
1,1,TreatmentName,VARCHAR,False,,False
2,2,_extraction_timestamp,VARCHAR,False,,False
3,3,_location,VARCHAR,False,,False
4,4,_run_id,VARCHAR,False,,False
5,5,_row_hash,VARCHAR,False,,False


Unnamed: 0,PatientIDx,TreatmentName,_extraction_timestamp,_location,_run_id,_row_hash
0,NEXTGEN_43622.7870662732,1icsi,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,547067ba7f7c02d100f6c2017b566648
1,NEXTGEN_43622.7870662732,2022 - 12,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,29e103fd7f34cd3e376b53ffc7f779c3
2,NEXTGEN_43622.7870662732,Test August,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,229cf5bb2472fdb0fc43e68b7081934f
3,PC10T4L72760_43623.4196205208,1,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,a93a119ac31ba224d63babb8dad94fa9
4,PC10T4L72760_43623.4196205208,2,2025-07-15 19:21:41.460065,Vila Mariana,5a1a0509-7c67-43d2-a882-27ef8fcbf469,3285ffa98d446612099c62bf105f9cf9


Records: (3541,)


In [14]:
# df['raw_json'].values