In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import datetime
from pyspark.sql.functions import current_timestamp, lit

print(" Notebook Bronze - Ingest√£o das identidades iniciada")

In [0]:
processed_date = datetime.now().strftime("%Y-%m-%d")
failed_files = []
processed_files = []

# Listar apenas arquivos PII que existem
try:
    files = dbutils.fs.ls("abfss://landing@mystoacc.dfs.core.windows.net/")
    pii_files = [f.name.replace('.csv', '') for f in files 
                if f.name.endswith('_identidade.csv')]
    
    print(f"Arquivos PII encontrados na landing: {pii_files}")
    
    if len(pii_files) == 0:
        print("Nenhum arquivo PII encontrado. Finalizando.")
    
except Exception as e:
    print(f"Erro ao listar landing: {e}")
    pii_files = []

# Processar apenas os que existem
for table_name in pii_files:
    try:
        print(f"\nProcessando {table_name}...")

        # Ler CSV do landing
        df = spark.read.format("csv") \
            .option("header", "true") \
            .option("inferSchema", "true") \
            .load(f"abfss://landing@mystoacc.dfs.core.windows.net/{table_name}.csv")
        
        df_ct = df.withColumn("ingestion_timestamp", current_timestamp()) \
                  .withColumn("ingestion_date", lit(processed_date)) \
                  .withColumn("is_pii", lit(True))

        # Escrever em Delta na pasta Bronze/pii
        df_ct.write.format("delta") \
            .mode("append") \
            .option("mergeSchema", "true") \
            .save(f"abfss://bronze@mystoacc.dfs.core.windows.net/pii/{table_name}")

        print(f"tabela PII: {table_name} salva na bronze/pii/")

        source = f"abfss://landing@mystoacc.dfs.core.windows.net/{table_name}.csv"
        dest = f"abfss://landing@mystoacc.dfs.core.windows.net/processed/pii/{processed_date}/{table_name}.csv"
        dbutils.fs.cp(source, dest)
        dbutils.fs.rm(source)

        print(f"arquivo: {table_name} movido para processed/pii/")
        processed_files.append(table_name)

    except Exception as e:
        print(f"ERRO: {str(e)}")
        failed_files.append(table_name)

        source = f"abfss://landing@mystoacc.dfs.core.windows.net/{table_name}.csv"
        dest = f"abfss://landing@mystoacc.dfs.core.windows.net/failed/pii/{processed_date}/{table_name}.csv"
        try:
            dbutils.fs.cp(source, dest)
            dbutils.fs.rm(source)
            print(f"Arquivo: {table_name} movido para failed/pii/")
        except:
            print(f"Nao foi possivel mover arquivo")

print(f"\narquivos processados: {processed_files}")
print(f"arquivos com falha: {failed_files}")