In [0]:
spark.sql("""
CREATE DATABASE IF NOT EXISTS hive_metastore.healthcare_bronze
LOCATION 'abfss://bronze@mystoacc.dfs.core.windows.net/'
""")

spark.sql("""
CREATE DATABASE IF NOT EXISTS hive_metastore.healthcare_silver
LOCATION 'abfss://silver@mystoacc.dfs.core.windows.net/'
""")

spark.sql("""
CREATE DATABASE IF NOT EXISTS hive_metastore.healthcare_gold
LOCATION 'abfss://gold@mystoacc.dfs.core.windows.net/'
""")

print("Databases criados!")

In [0]:
spark.sql("USE hive_metastore.healthcare_bronze")

# Dimensões
dimensions = [
    ("dim_data", "dim_data"),
    ("dim_clinica", "dim_clinica"),
    ("dim_medico", "dim_medico"),
    ("dim_diagnostico", "dim_diagnostico"),
    ("dim_exame", "dim_exame"),
    ("dim_paciente", "dim_paciente")
]

print("Criando dimensoes...")
for table_name, path in dimensions:
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {table_name}
    USING DELTA
    LOCATION 'abfss://bronze@mystoacc.dfs.core.windows.net/{path}'
    """)
    print(f"Tabela {table_name} criada")

# Fato
print("\nCriando fato...")
spark.sql("""
CREATE TABLE IF NOT EXISTS fato_consultas
USING DELTA
LOCATION 'abfss://bronze@mystoacc.dfs.core.windows.net/facts/fato_consultas'
""")
print("Tabela fato_consultas criada")

# PII
print("\nCriando tabelas PII...")
spark.sql("""
CREATE TABLE IF NOT EXISTS pii_paciente_identidade
USING DELTA
LOCATION 'abfss://bronze@mystoacc.dfs.core.windows.net/pii/paciente_identidade'
""")
print("Tabela pii_paciente_identidade criada")

spark.sql("""
CREATE TABLE IF NOT EXISTS pii_medico_identidade
USING DELTA
LOCATION 'abfss://bronze@mystoacc.dfs.core.windows.net/pii/medico_identidade'
""")
print("Tabela pii_medico_identidade criada")

print("\nCriando tabela de controle...")
spark.sql("""
CREATE TABLE IF NOT EXISTS pipeline_execution_log (
    execution_id STRING,
    pipeline_name STRING,
    start_time TIMESTAMP,
    end_time TIMESTAMP,
    status STRING,
    records_processed INT,
    error_message STRING,
    run_date DATE,
    executed_by STRING
)
USING DELTA
LOCATION 'abfss://bronze@mystoacc.dfs.core.windows.net/control/pipeline_execution_log'
""")
print("Tabela pipeline_execution_log criada")

print("\nTodas as tabelas foram criadas com sucesso")

# Validar
display(spark.sql("SHOW TABLES IN hive_metastore.healthcare_bronze"))

In [0]:
spark.sql("USE healthcare_silver")

# Registrar dimensões
dimensions = ["dim_data", "dim_clinica", "dim_medico", "dim_diagnostico", "dim_exame", "dim_paciente"]

for table in dimensions:
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {table}
    USING DELTA
    LOCATION 'abfss://silver@mystoacc.dfs.core.windows.net/{table}'
    """)
    print(f"✓ Tabela {table} registrada")

# Registrar fato
spark.sql("""
CREATE TABLE IF NOT EXISTS fato_consultas
USING DELTA
LOCATION 'abfss://silver@mystoacc.dfs.core.windows.net/fato_consultas'
""")
print("✓ Tabela fato_consultas registrada")

# Registrar PII
spark.sql("""
CREATE TABLE IF NOT EXISTS paciente_identidade
USING DELTA
LOCATION 'abfss://silver@mystoacc.dfs.core.windows.net/paciente_identidade'
""")
print("✓ Tabela paciente_identidade registrada")

spark.sql("""
CREATE TABLE IF NOT EXISTS medico_identidade
USING DELTA
LOCATION 'abfss://silver@mystoacc.dfs.core.windows.net/medico_identidade'
""")
print("✓ Tabela medico_identidade registrada")

# Verificar
print("\n" + "="*60)
print("Tabelas registradas:")
display(spark.sql("SHOW TABLES"))

In [0]:
spark.sql("USE healthcare_gold")

# Registrar agregações de métricas (gold_01)
metricas = [
    "agg_consultas_por_periodo",
    "agg_consultas_por_medico",
    "agg_consultas_por_clinica",
    "agg_consultas_por_diagnostico",
    "agg_performance_exames",
    "agg_resumo_especialidade",
    "agg_perfil_pacientes"
]

for table in metricas:
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {table}
    USING DELTA
    LOCATION 'abfss://gold@mystoacc.dfs.core.windows.net/{table}'
    """)
    print(f"✓ Tabela {table} registrada")

# Registrar relatórios (gold_02)
relatorios = [
    "rpt_top_medicos_receita",
    "rpt_pacientes_alto_gasto",
    "rpt_medicos_por_clinica",
    "rpt_historico_pacientes",
    "rpt_medicos_pacientes_cidade",
    "rpt_contato_pacientes_vip"
]

for table in relatorios:
    spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {table}
    USING DELTA
    LOCATION 'abfss://gold@mystoacc.dfs.core.windows.net/{table}'
    """)
    print(f"✓ Tabela {table} registrada")

# Verificar
print("\n" + "="*60)
print("Tabelas registradas na Gold:")
display(spark.sql("SHOW TABLES"))

In [0]:
%sql
    
select * from hive_metastore.healthcare_bronze.pipeline_execution_log order by end_time desc

In [0]:
# %sql
# -- No Databricks
# USE CATALOG hive_metastore;
# USE healthcare_bronze;

# DELETE FROM dim_paciente;
# DELETE FROM dim_medico;
# DELETE FROM dim_clinica;
# DELETE FROM dim_data;
# DELETE FROM dim_diagnostico;
# DELETE FROM dim_exame;

In [0]:
# %sql SHOW DATABASES IN hive_metastore;

In [0]:
# %sql
# select * from hive_metastore.healthcare_silver.fato_consultas