# Diagn√≥stico Delta Lake

Este notebook verifica:
1. Se os arquivos Parquet existem
2. Se o Spark consegue ler os arquivos
3. Se as tabelas Delta foram criadas

In [None]:
# Configurar Spark Session (reutilizar da c√©lula anterior do delta_lake_setup.ipynb)
# Se n√£o tiver executado, execute primeiro as c√©lulas 1-3 do delta_lake_setup.ipynb

import os
os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-17-openjdk-amd64'

from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip

# Configura√ß√µes MinIO
MINIO_ENDPOINT = "ch8ai-minio.l6zv5a.easypanel.host"
MINIO_ACCESS_KEY = "admin"
MINIO_SECRET_KEY = "1q2w3e4r"
BUCKET_NAME = "govbr"

s3a_packages = "org.apache.hadoop:hadoop-aws:3.3.4,com.amazonaws:aws-java-sdk-bundle:1.12.262"
delta_package = "io.delta:delta-spark_2.13:4.0.0"

builder = SparkSession.builder \
    .appName("Diagn√≥stico Delta") \
    .config("spark.jars.packages", f"{delta_package},{s3a_packages}") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .config("spark.hadoop.fs.s3a.endpoint", f"https://{MINIO_ENDPOINT}") \
    .config("spark.hadoop.fs.s3a.access.key", MINIO_ACCESS_KEY) \
    .config("spark.hadoop.fs.s3a.secret.key", MINIO_SECRET_KEY) \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .config("spark.hadoop.fs.s3a.connection.ssl.enabled", "true") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
    .config("spark.master", "local[*]")

spark = configure_spark_with_delta_pip(builder).getOrCreate()
print("‚úÖ Spark Session criada!")

In [None]:
# Verificar se os arquivos Parquet existem
s3_base = f"s3a://{BUCKET_NAME}"

parquet_paths = {
    "bronze_municipios": f"{s3_base}/bronze/ibge/municipios/dt=20251114/data.parquet",
    "bronze_estados": f"{s3_base}/bronze/ibge/estados/dt=20251114/data.parquet",
    "bronze_bpc": f"{s3_base}/bronze/portal_transparencia/bpc_municipios/dt=20251114/data.parquet",
    "prata_dim_municipios": f"{s3_base}/prata/dim_municipios/dt=20251114/data.parquet",
    "prata_dim_estados": f"{s3_base}/prata/dim_estados/dt=20251114/data.parquet",
    "prata_fato_bpc": f"{s3_base}/prata/fato_bpc/dt=20251114/data.parquet",
    "ouro_dim_municipios": f"{s3_base}/ouro/dim_municipios_enriquecida/dt=20251114/data.parquet",
    "ouro_dim_estados": f"{s3_base}/ouro/dim_estados_enriquecida/dt=20251114/data.parquet",
    "ouro_fato_bpc": f"{s3_base}/ouro/fato_bpc_enriquecido/dt=20251114/data.parquet",
}

print("\n" + "=" * 80)
print("VERIFICANDO ARQUIVOS PARQUET")
print("=" * 80)

for name, path in parquet_paths.items():
    try:
        df = spark.read.parquet(path)
        count = df.count()
        print(f"‚úÖ {name}: {count} registros - {path}")
    except Exception as e:
        print(f"‚ùå {name}: ERRO - {str(e)[:100]}")
        print(f"   Path: {path}")

In [None]:
# Verificar tabelas Delta existentes
print("\n" + "=" * 80)
print("TABELAS DELTA EXISTENTES")
print("=" * 80)

try:
    tables = spark.sql("SHOW TABLES").collect()
    if tables:
        for table in tables:
            print(f"üìä {table.tableName}")
    else:
        print("‚ö†Ô∏è  Nenhuma tabela encontrada")
except Exception as e:
    print(f"‚ùå Erro ao listar tabelas: {e}")

In [None]:
# Tentar criar uma tabela Delta de teste
print("\n" + "=" * 80)
print("TESTE: CRIAR TABELA DELTA")
print("=" * 80)

test_parquet = f"{s3_base}/bronze/ibge/estados/dt=20251114/data.parquet"
test_delta = f"{s3_base}/delta/test/test_estados"
test_table = "test_estados"

try:
    print(f"\n1. Lendo Parquet: {test_parquet}")
    df = spark.read.parquet(test_parquet)
    count = df.count()
    print(f"   ‚úÖ Lido: {count} registros")
    
    print(f"\n2. Escrevendo Delta: {test_delta}")
    df.write.format("delta").mode("overwrite").save(test_delta)
    print(f"   ‚úÖ Delta escrito")
    
    print(f"\n3. Criando tabela: {test_table}")
    spark.sql(f"""
        CREATE TABLE IF NOT EXISTS {test_table}
        USING DELTA
        LOCATION '{test_delta}'
    """)
    print(f"   ‚úÖ Tabela criada")
    
    print(f"\n4. Verificando tabela:")
    result = spark.sql(f"SELECT COUNT(*) as cnt FROM {test_table}").collect()
    print(f"   ‚úÖ Tabela funciona: {result[0]['cnt']} registros")
    
    print(f"\n5. Mostrando dados:")
    spark.sql(f"SELECT * FROM {test_table} LIMIT 3").show(truncate=False)
    
except Exception as e:
    print(f"\n‚ùå Erro: {e}")
    import traceback
    traceback.print_exc()