In [0]:
# Databricks notebook source
# Receber parâmetro de data
dbutils.widgets.text("report_date", "")
report_date = dbutils.widgets.get("report_date")

from datetime import datetime

# Se não passar data, usa hoje
if not report_date:
    report_date = datetime.now().strftime("%Y-%m-%d")

print(f"Gerando relatorio para: {report_date}")

# Query nos logs
spark.sql("USE CATALOG hive_metastore")
spark.sql("USE healthcare_bronze")

df_summary = spark.sql(f"""
SELECT 
    COUNT(*) as total_execucoes,
    SUM(CASE WHEN status = 'SUCCESS' THEN 1 ELSE 0 END) as sucessos,
    SUM(CASE WHEN status = 'FAILED' THEN 1 ELSE 0 END) as falhas,
    SUM(records_processed) as total_registros,
    COUNT(DISTINCT pipeline_name) as pipelines_distintas
FROM pipeline_execution_log
WHERE run_date = '{report_date}'
""")

# Converter para dict
summary = df_summary.collect()[0].asDict()

# Buscar breakdown por pipeline
df_breakdown = spark.sql(f"""
SELECT 
    pipeline_name,
    COUNT(*) as total_execucoes,
    SUM(CASE WHEN status = 'SUCCESS' THEN 1 ELSE 0 END) as sucessos,
    SUM(CASE WHEN status = 'FAILED' THEN 1 ELSE 0 END) as falhas,
    SUM(records_processed) as registros_processados
FROM pipeline_execution_log
WHERE run_date = '{report_date}'
GROUP BY pipeline_name
ORDER BY pipeline_name
""")

# Formatar breakdown como texto com quebras HTML
breakdown_text = ""
for row in df_breakdown.collect():
    breakdown_text += f"• {row.pipeline_name}: {row.total_execucoes} exec ({row.sucessos} sucessos, {row.falhas} falhas) - {row.registros_processados} registros<br>"

if not breakdown_text:
    breakdown_text = "Nenhuma execucao registrada."

# Buscar detalhes de falhas
df_failures = spark.sql(f"""
SELECT 
    pipeline_name,
    DATE_FORMAT(start_time, 'dd/MM/yyyy HH:mm:ss') as start_time,
    error_message
FROM pipeline_execution_log
WHERE run_date = '{report_date}'
AND status = 'FAILED'
ORDER BY start_time DESC
""")

failures_list = [row.asDict() for row in df_failures.collect()]

# Preparar output para o ADF
output = {
    "report_date": report_date,
    "total_execucoes": summary['total_execucoes'],
    "sucessos": summary['sucessos'],
    "falhas": summary['falhas'],
    "total_registros": summary['total_registros'],
    "pipelines_distintas": summary['pipelines_distintas'],
    "breakdown": breakdown_text,
    "failures": failures_list
}

import json
result = json.dumps(output)

print("Relatorio gerado:")
print(result)

# Retornar para o ADF
dbutils.notebook.exit(result)