In [0]:
from pyspark.sql.functions import current_timestamp

# Definições do Unity Catalog
catalog_name = "workspace"
landing_schema = "landing_db"
landing_volume = "landing_files"
bronze_schema = "bronze_db"

# Caminho base para os arquivos CSV no Volume
volume_path_base = f"/Volumes/{catalog_name}/{landing_schema}/{landing_volume}"

# Garante que o schema de destino exista
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{bronze_schema}")

tables = [
    "companies", "company_reviews", "employment_types", "industries",
    "job_benefits", "job_skills", "jobs", "locations", "salary_ranges", "skills"
]

for table_name in tables:
    print(f"Processando para camada Bronze: {table_name}")
    
    # Monta o caminho para a pasta onde os CSVs foram salvos pelo script anterior
    landing_csv_path = f"{volume_path_base}/{table_name}"
    
    # Lê os arquivos CSV do Volume
    df = spark.read.format("csv").option("header", "true").option("inferSchema", "true").load(landing_csv_path)
    
    df_with_metadata = df.withColumn("ingestion_date", current_timestamp())

    # Salva como uma tabela Delta gerenciada no schema Bronze do Catalog
    full_table_name = f"{catalog_name}.{bronze_schema}.{table_name}"
    df_with_metadata.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(full_table_name)
    
    print(f"Tabela {full_table_name} salva com sucesso no Catalog.")

print("\nProcesso da camada Bronze finalizado.")