In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS ibge.silver

In [0]:
# Import das funções
from pyspark.sql.functions import col, trim
from pyspark.sql.types import IntegerType, DoubleType

In [0]:
#Lendo DataFrame na Silver
emprego_df = spark.table("ibge.bronze.emprego")

In [0]:
# Remover header residual
emprego_linha_df = (
    emprego_df
    .filter(col("NC") != "Nível Territorial (Código)")
)

# Padronização + Cast
emprego_final_df = (
    emprego_linha_df

    # Renomeando colunas
    .withColumnRenamed("NC", "cod_nivel_territorial")
    .withColumnRenamed("NN", "nivel_territorial")
    .withColumnRenamed("MN", "unid_medida")
    .withColumnRenamed("V", "valor")
    .withColumnRenamed("D1C", "cod_municipio")
    .withColumnRenamed("D1N", "municipio")
    .withColumnRenamed("D2N", "variavel")
    .withColumnRenamed("D3N", "ano")

    # Conversão de tipos
    .withColumn("cod_municipio", col("cod_municipio").cast(IntegerType()))
    .withColumn("ano", col("ano").cast(IntegerType()))
    .withColumn("cod_nivel_territorial", col("cod_nivel_territorial").cast(IntegerType()))
    .withColumn("valor", col("valor").cast(DoubleType()))

    # Trim nas strings
    .withColumn("municipio", trim(col("municipio")))
    .withColumn("variavel", trim(col("variavel")))
    .withColumn("nivel_territorial", trim(col("nivel_territorial")))
    .withColumn("unid_medida", trim(col("unid_medida")))
)

In [0]:
# Caminho gerenciado por tabela
tabela_silver = "ibge.silver.emprego"

(
    emprego_final_df
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(tabela_silver)
)