In [0]:
spark.sql("CREATE DATABASE IF NOT EXISTS bronze")
spark.sql("CREATE DATABASE IF NOT EXISTS silver")

In [0]:
from pyspark.sql.functions import current_timestamp, col, lit
from pyspark.sql.types import *
import requests
from datetime import datetime

base_path = "/Volumes/rocketlab/bronze/csv_files/"

In [0]:
def criar_tabela_bronze(arquivo_csv, nome_tabela):

    caminho_arquivo = base_path + arquivo_csv

    df = spark.read.csv(caminho_arquivo, header=True, inferSchema=True)

    df = df.withColumn("ingestion_timestamp", current_timestamp())

    df.write.mode("overwrite").saveAsTable(nome_tabela)
    
    count = df.count()
    
    return df


In [0]:
tabelas = {
    "olist_customers_dataset.csv": "bronze.ft_consumidores",
    "olist_geolocation_dataset.csv": "bronze.ft_geolocalizacao",
    "olist_order_items_dataset.csv": "bronze.ft_itens_pedidos",
    "olist_order_payments_dataset.csv": "bronze.ft_pagamentos_pedidos",
    "olist_order_reviews_dataset.csv": "bronze.ft_avaliacoes_pedidos",
    "olist_orders_dataset.csv": "bronze.ft_pedidos",
    "olist_products_dataset.csv": "bronze.ft_produtos",
    "olist_sellers_dataset.csv": "bronze.ft_vendedores",
    "product_category_name_translation.csv": "bronze.dm_categoria_produtos_traducao"
}

for arquivo, tabela in tabelas.items():
    criar_tabela_bronze(arquivo, tabela)

In [0]:
data_inicio_formatada = "01-01-2017"
data_fim_formatada = "12-31-2017"

In [0]:
url = f"https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarPeriodo(dataInicial=@dataInicial,dataFinalCotacao=@dataFinalCotacao)?@dataInicial='{data_inicio_formatada}'&@dataFinalCotacao='{data_fim_formatada}'&$select=dataHoraCotacao,cotacaoCompra&$format=json"

response = requests.get(url)

if response.status_code == 200:
    dados = response.json()
    cotacoes = dados['value']
    
    df_cotacao = spark.createDataFrame(cotacoes)

    df_cotacao = df_cotacao.withColumn("ingestion_timestamp", current_timestamp())
    
    df_cotacao.write.mode("overwrite").saveAsTable("bronze.dm_cotacao_dolar")
    
    print(f"Tabela bronze.dm_cotacao_dolar criada com {df_cotacao.count()} registros")
    display(df_cotacao.limit(5))
else:
    print(f"Erro ao buscar dados da API: {response.status_code}")

In [0]:
tabelas_bronze = spark.sql("SHOW TABLES IN bronze")
display(tabelas_bronze)