In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from datetime import datetime
from pyspark.sql.functions import current_timestamp, lit


print(" Notebook Bronze - Ingest√£o iniciada")

In [0]:
processed_date = datetime.now().strftime("%Y-%m-%d")
failed_files = []
processed_files = []

try:
    files = dbutils.fs.ls("abfss://landing@mystoacc.dfs.core.windows.net/")
    dimension_files = [f.name.replace('.csv', '') for f in files 
                      if f.name.startswith('dim_') and f.name.endswith('.csv')]
    
    print(f"Arquivos encontrados na landing: {dimension_files}")
    
    if len(dimension_files) == 0:
        print("Nenhum arquivo de dimensao encontrado. Finalizando.")
    
except Exception as e:
    print(f"Erro ao listar landing: {e}")
    dimension_files = []

for table_name in dimension_files:
    try:
        print(f"\nProcessando {table_name}...")

        # Ler CSV do landing
        df = spark.read.format("csv") \
            .option("header", "true") \
            .option("inferSchema", "true") \
            .load(f"abfss://landing@mystoacc.dfs.core.windows.net/{table_name}.csv")
        
        df_ct = df.withColumn("ingestion_timestamp", current_timestamp()) \
                  .withColumn("ingestion_date", lit(processed_date))

        # Escrever em Delta na pasta Bronze
        df_ct.write.format("delta") \
            .mode("append") \
            .option("mergeSchema", "true") \
            .save(f"abfss://bronze@mystoacc.dfs.core.windows.net/{table_name}")

        print(f"tabela: {table_name} salva na bronze")

        source = f"abfss://landing@mystoacc.dfs.core.windows.net/{table_name}.csv"
        dest = f"abfss://landing@mystoacc.dfs.core.windows.net/processed/{table_name}/{processed_date}/{table_name}.csv"
        dbutils.fs.cp(source, dest)
        dbutils.fs.rm(source)

        print(f"arquivo: {table_name} movido para processed")
        processed_files.append(table_name)

    except Exception as e:
        print(f"ERRO: {str(e)}")
        failed_files.append(table_name)

        source = f"abfss://landing@mystoacc.dfs.core.windows.net/{table_name}.csv"
        dest = f"abfss://landing@mystoacc.dfs.core.windows.net/failed/{processed_date}/{table_name}.csv"
        try:
            dbutils.fs.cp(source, dest)
            dbutils.fs.rm(source)
            print(f"Arquivo: {table_name} movido para failed")
        except:
            print(f"Nao foi possivel mover arquivo")

print(f"\narquivos processados: {processed_files}")
print(f"arquivos com falha: {failed_files}")