### Setup e Variáveis

In [None]:
from notebooks.core.bemol_lakestorage import BemolLakeStorage
from core.bemol_controller import BemolController
from core.bemol_landing_reader import BemolLandingReader
from core.bemol_logger import BemolLogger
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

In [None]:
# Instanciando classe BemolLogger
logger = BemolLogger("bronze_users")

# Configurando Spark com Delta Lake
spark = (
  SparkSession.builder
  .appName("IngestaoBronzeUsuarios")
  .config("spark.jars.packages", "io.delta:delta-core_2.12:2.4.0")
  .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
  .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
  .getOrCreate()
)

# Instanciando classe de leitura da camada landing BemolLandingReader
landing = BemolLandingReader(logger)

# Instanciando classe de leitura/escrita de dados BemolLakeStorage
lakehouse = BemolLakeStorage(spark, logger)

In [None]:
# Definindo URL da API e destino dos dados
url_users = "https://fakestoreapi.com/users"
destination_products = "../data/bronze/users"

# Path para salvar os dados de monitoramento
destination_path_monitor = "../data/monitoring/"

### Leitura

In [None]:
# Lendo dados da API e criando DataFrame através do método read_api
df_users = landing.read_api(spark, url_users)

### Transformações

In [None]:
# Remove registros com valores nulos em colunas críticas
df_users_bronze = df_users.dropna(subset=["id"])

# Remove coluna password
df_users_bronze = df_users_bronze.drop("password")

# Extrai campos aninhados
df_users_bronze = (
    df_users_bronze
    .withColumn("first_name", col("name.firstname"))
    .withColumn("last_name", col("name.lastname"))
    .withColumn("city", col("address.city"))
    .withColumn("number", col("address.number"))
    .withColumn("street", col("address.street"))
    .withColumn("zip_code", col("address.zipcode"))
    .withColumn("lat", col("address.geolocation.lat"))
    .withColumn("long", col("address.geolocation.long"))
    .drop("name", "address", "__v")  # remove as colunas aninhadas originais
)

### Escrita

In [None]:
# Adiciona coluna de controle através do método control_field
df_users_bronze = BemolController.control_field(df_users_bronze, layer="bronze")

In [None]:
# Grava os dados na camada Bronze no formato Delta, utilizando o modo overwrite como padrão através do método write_bronze
lakehouse.write_bronze(df_users_bronze, destination_products, table_name="bronze_users")

In [None]:
# Grava os dados de monitoramento no formato Delta, utilizando o modo overwrite como padrão através do método export_delta
lakehouse.monitor.export_delta(spark, destination_path_monitor)