In [0]:
from pyspark.sql.functions import col, regexp_replace, split, to_timestamp
from pyspark.sql.types import IntegerType, DecimalType

spark.sql("set spark.sql.legacy.timeParserPolicy=LEGACY")

# Definição dos schemas do Unity Catalog
bronze_schema = "workspace.bronze_db"
silver_schema = "workspace.silver_db"

# Garante que o schema de destino exista
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {silver_schema}")

# 1. Tabela 'salary_ranges'
try:
    print("Processando 'salary_ranges'...")
    df_salary_bronze = spark.read.table(f"{bronze_schema}.salary_ranges")

    df_salary_silver = df_salary_bronze \
        .withColumn("range_cleaned", regexp_replace(col("range_description"), r"[\$kK]", "")) \
        .withColumn("salary_parts", split(col("range_cleaned"), "-")) \
        .withColumn("min_salary", (col("salary_parts").getItem(0)).cast(IntegerType()) * 1000) \
        .withColumn("max_salary", (col("salary_parts").getItem(1)).cast(IntegerType()) * 1000) \
        .withColumn("avg_salary", (col("min_salary") + col("max_salary")) / 2) \
        .select("id", "range_description", "min_salary", "max_salary", "avg_salary")

    df_salary_silver.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{silver_schema}.salary_ranges")
    print("'salary_ranges' processada com sucesso.")
except Exception as e:
    print(f"Erro ao processar 'salary_ranges': {e}")

# 2. Tabela 'jobs'
try:
    print("Processando 'jobs'...")
    df_jobs_bronze = spark.read.table(f"{bronze_schema}.jobs")
    df_jobs_silver = df_jobs_bronze.withColumn("listing_date", to_timestamp(col("listing_date")))
    df_jobs_silver.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{silver_schema}.jobs")
    print("'jobs' processada com sucesso.")
except Exception as e:
    print(f"Erro ao processar 'jobs': {e}")

# 3. Tabela 'companies'
try:
    print("Processando 'companies'...")
    df_companies_bronze = spark.read.table(f"{bronze_schema}.companies")
    df_companies_silver = df_companies_bronze.withColumn("company_rating", col("company_rating").cast(DecimalType(3, 1)))
    df_companies_silver.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(f"{silver_schema}.companies")
    print("'companies' processada com sucesso.")
except Exception as e:
    print(f"Erro ao processar 'companies': {e}")

# 4. Outras tabelas (movidas diretamente)
simple_tables = ["company_reviews", "employment_types", "industries", "job_benefits", "job_skills", "locations", "skills"]
for table_name in simple_tables:
    try:
        print(f"Processando '{table_name}'...")
        df_bronze = spark.read.table(f"{bronze_schema}.{table_name}")
        df_bronze.write.format("delta").mode("overwrite").saveAsTable(f"{silver_schema}.{table_name}")
        print(f"'{table_name}' movida para Silver com sucesso.")
    except Exception as e:
        print(f"Erro ao processar '{table_name}': {e}")

print("\nProcesso da camada Silver finalizado.")