In [6]:
#!/usr/bin/env python3
"""
Patient Timeline Creator
Creates a comprehensive timeline for patients by combining data from multiple tables.
"""

import pandas as pd
import duckdb as db
import os
from datetime import datetime
from typing import Dict, List, Tuple, Optional

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [43]:

def get_database_connection():
    """Create and return a connection to the huntington_data_lake database"""
    path_to_db = '../../database/huntington_data_lake.duckdb'
    conn = db.connect(path_to_db, read_only=True)
    
    print(f"Connected to database: {path_to_db}")
    print(f"Database file exists: {os.path.exists(path_to_db)}")
    
    return conn

def extract_timeline_data_for_patient(conn, test_prontuario):
    """Extract timeline data for a specific patient"""
    query = f"""
        SELECT * FROM gold.all_patients_timeline
        WHERE prontuario = {test_prontuario}
    """
    return conn.execute(query).fetchdf()

def explore_consultas(conn, test_prontuario):
    """Explore consultas for a specific patient"""
    query = f"""

        SELECT
            reference_value,
            COUNT(1) 
        FROM gold.recent_patients_timeline
        WHERE reference_value LIKE '%FIV%' OR reference_value LIKE '%FET%'
            -- AND prontuario = {test_prontuario}
        GROUP BY reference_value
        ORDER BY COUNT(1) DESC
    """
    return conn.execute(query).fetchdf()


In [44]:

def main(test_prontuario):
    """Main function to create patient timeline"""
    
    # Test with a single patient first
    # test_prontuario = 175583
    
    print(f"Creating timeline for patient: {test_prontuario}")
    
    # Get database connection
    conn = get_database_connection()
    
    try:
        # Extract timeline data
        timeline_df = extract_timeline_data_for_patient(conn, test_prontuario)
       
        agg_consultas_df = explore_consultas(conn, test_prontuario)
        
        return timeline_df, agg_consultas_df
        
    finally:
        conn.close()

In [45]:
if __name__ == "__main__":
    patient_id = 876_950
    # patient_id = 825_890 # Claudia
    patient_id = 175_583 # Renata
    # patient_id = 220_783 # caso cabelud0: - 26 tentativas
    # patient_id = 182_925 # caso completo
    timeline_df, agg_consultas_df = main(patient_id)


Creating timeline for patient: 175583
Connected to database: ../../database/huntington_data_lake.duckdb
Database file exists: True


In [46]:
timeline_df

Unnamed: 0,prontuario,event_id,event_date,reference,reference_value,tentativa,flag_date_estimated,additional_info
0,175583.0,1359557,2025-03-27,extrato_atendimentos,Consulta DOE: Receptora,,False,{}
1,175583.0,1330568,2025-02-14,extrato_atendimentos,Consulta Psicologia,,False,{}
2,175583.0,1321302,2025-01-23,extrato_atendimentos,Consulta DOE: Receptora,,False,{}
3,175583.0,1300888,2024-12-11,extrato_atendimentos,Consulta DOE: Receptora,,False,{}
4,175583.0,1293473,2024-11-28,extrato_atendimentos,Consulta de Reavaliação de Ciclo,,False,{}
5,175583.0,30324,2024-11-14,tratamentos,Ciclo a Fresco FIV,4.0,False,"{""Unidade"": ""1"", ""ResultadoTratamento"": ""No tr..."
6,175583.0,1288113,2024-11-14,extrato_atendimentos,Coleta Seminal: FIV,,False,{}
7,175583.0,1287384,2024-11-14,extrato_atendimentos,Aspiração Folicular: FIV,,False,{}
8,175583.0,1286377,2024-11-11,extrato_atendimentos,5º US Ciclo,,False,{}
9,175583.0,1286062,2024-11-09,extrato_atendimentos,4º US Ciclo,,False,{}


In [47]:
agg_consultas_df

Unnamed: 0,reference_value,count(1)
0,Ciclo a Fresco FIV,2816
1,Aspiração Folicular: FIV,2619
2,Coleta Seminal: FIV,2048
3,FET: Ciclo Próprio,904
4,Descongelamentos Seminal: FIV,267
5,FET: Excedente Próprio,202
6,FET: Ciclo Receptora,99
7,FET: Excedente Receptora,56
8,FET: Embriodoação,12
9,US Monitorização da ovulação FIV/Preservação/I...,6
