In [0]:
%sql
CREATE DATABASE IF NOT EXISTS bronze;
CREATE DATABASE IF NOT EXISTS silver;

In [0]:
from pyspark.sql.functions import current_timestamp

volume_base_path = "/Volumes/workspace/bronze/archive/"

#mapeamento dos 9 arquivos para as 9 tabelas
mapeamento_arquivos = {
    "olist_customers_dataset.csv": "bronze.ft_consumidores",
    "olist_geolocation_dataset.csv": "bronze.ft_geolocalizacao",
    "olist_order_items_dataset.csv": "bronze.ft_itens_pedidos",
    "olist_order_payments_dataset.csv": "bronze.ft_pagamentos_pedidos",
    "olist_order_reviews_dataset.csv": "bronze.ft_avaliacoes_pedidos",
    "olist_orders_dataset.csv": "bronze.ft_pedidos",
    "olist_products_dataset.csv": "bronze.ft_produtos",
    "olist_sellers_dataset.csv": "bronze.ft_vendedores",
    "product_category_name_translation.csv": "bronze.dm_categoria_produtos_traducao"
}

for nome_arquivo, nome_tabela in mapeamento_arquivos.items():
    
    #monta o caminho completo do arquivo
    file_path = f"{volume_base_path}{nome_arquivo}"
    
    #le o CSV
    df = spark.read.format("csv") \
        .option("header", "true") \
        .option("inferSchema", "true") \
        .load(file_path)

    #adiciona o timestamp
    df_bronze = df.withColumn("ingestion_timestamp", current_timestamp())

    #salva a tabela
    df_bronze.write.mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable(nome_tabela)

In [0]:
dbutils.widgets.text("data_inicio", "11-01-2025", "Data Início (MM-DD-AAAA)")
dbutils.widgets.text("data_fim", "11-10-2025", "Data Fim (MM-DD-AAAA)")

In [0]:
import requests
from pyspark.sql.functions import current_timestamp

data_inicio = dbutils.widgets.get("data_inicio")
data_fim = dbutils.widgets.get("data_fim")

url = f"https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?@dataInicial='{data_inicio}'&@dataFinalCotacao='{data_fim}'&$select=dataHoraCotacao,cotacaoCompra&$format=json"

response = requests.get(url)
data = response.json()
cotacoes = data.get('value', [])

if cotacoes:
    df_api = spark.createDataFrame(cotacoes)
    df_cotacao_bronze = df_api.withColumn("ingestion_timestamp", current_timestamp())
    df_cotacao_bronze.write.mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable("bronze.dm_cotacao_dolar")