# Coleta de Dados da API PTAX (Banco Central) com PySpark



In [2]:
import requests
import json

# URL correta da API PTAX
url = "https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/CotacaoDolarDia(dataCotacao=@dataCotacao)?@dataCotacao='11-06-2025'&$top=100&$format=json"

def requisicao_api(link):
    resposta = requests.get(link)

    if resposta.status_code == 200:
        dados = resposta.json()
        print("Status Code:", resposta.status_code)
        with open("resultado.json", "w", encoding="utf-8") as arquivo:
            json.dump(dados, arquivo, ensure_ascii=False, indent=4)
    else:
        print("Erro na requisi√ß√£o:", resposta.status_code)
        print(resposta.text)

# Faz a requisi√ß√£o
requisicao_api(url)


with open('resultado.json', 'r', encoding='utf-8') as f:
    dados = json.load(f)

registros = dados['value']
print('Exemplo de registros:', registros[:3])

Status Code: 200
Exemplo de registros: [{'cotacaoCompra': 5.3445, 'cotacaoVenda': 5.3451, 'dataHoraCotacao': '2025-11-06 13:02:49.425'}]


In [3]:
# Instalar PySpark
!apt-get update -qq > /dev/null
!apt-get install openjdk-11-jdk -qq > /dev/null
!pip install -q pyspark

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


In [4]:
# Configurar ambiente e iniciar Spark
import os
os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
os.environ['PATH'] += os.pathsep + os.path.join(os.environ['JAVA_HOME'], 'bin')

from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.appName('API_PTAX_PySpark').getOrCreate()

df = spark.createDataFrame(registros)
df.printSchema()
df.show(5)

root
 |-- cotacaoCompra: double (nullable = true)
 |-- cotacaoVenda: double (nullable = true)
 |-- dataHoraCotacao: string (nullable = true)

+-------------+------------+--------------------+
|cotacaoCompra|cotacaoVenda|     dataHoraCotacao|
+-------------+------------+--------------------+
|       5.3445|      5.3451|2025-11-06 13:02:...|
+-------------+------------+--------------------+



In [16]:
# üß© 5 TRANSFORMA√á√ïES


df_sel = df.select('dataHoraCotacao', 'cotacaoCompra', 'cotacaoVenda')
df_tipos = df_sel.withColumn('cotacaoCompra', F.col('cotacaoCompra').cast('double')) \
.withColumn('cotacaoVenda', F.col('cotacaoVenda').cast('double'))
df_diferenca = df_tipos.withColumn('dif_venda_compra', F.round(F.col('cotacaoVenda') - F.col('cotacaoCompra'), 4))
df_filtrado = df_diferenca.filter(F.col('cotacaoCompra') > 5.0)
df_final = df_filtrado.orderBy(F.col('dataHoraCotacao').desc())
df_final.show(10, truncate=False)

+-----------------------+-------------+------------+----------------+
|dataHoraCotacao        |cotacaoCompra|cotacaoVenda|dif_venda_compra|
+-----------------------+-------------+------------+----------------+
|2025-11-06 13:02:49.425|5.3445       |5.3451      |6.0E-4          |
+-----------------------+-------------+------------+----------------+



In [8]:
# ========================
# A√á√ïES: Executam o plano de transforma√ß√£o
# ========================

# 1Ô∏è‚É£ Mostrar os primeiros registros
print("Exibindo registros processados:")
df_final.show(truncate=False)



Exibindo registros processados:
+-----------------------+-------------+------------+----------------+
|dataHoraCotacao        |cotacaoCompra|cotacaoVenda|dif_venda_compra|
+-----------------------+-------------+------------+----------------+
|2025-11-06 13:02:49.425|5.3445       |5.3451      |6.0E-4          |
+-----------------------+-------------+------------+----------------+



In [9]:
# 2Ô∏è‚É£ Contar n√∫mero total de registros
total_registros = df_final.count()
print(f"Total de registros no DataFrame final: {total_registros}")

Total de registros no DataFrame final: 1


In [11]:
# 3Ô∏è‚É£ Mostrar estat√≠sticas descritivas das cota√ß√µes
print("\nEstat√≠sticas das colunas num√©ricas:")
df_final.describe(["cotacaoCompra", "cotacaoVenda", "dif_venda_compra"]).show()


Estat√≠sticas das colunas num√©ricas:
+-------+-------------+------------+----------------+
|summary|cotacaoCompra|cotacaoVenda|dif_venda_compra|
+-------+-------------+------------+----------------+
|  count|            1|           1|               1|
|   mean|       5.3445|      5.3451|          6.0E-4|
| stddev|         NULL|        NULL|            NULL|
|    min|       5.3445|      5.3451|          6.0E-4|
|    max|       5.3445|      5.3451|          6.0E-4|
+-------+-------------+------------+----------------+



In [12]:
# 4Ô∏è‚É£ Calcular m√©dia das cota√ß√µes
df_final.select(
    F.mean("cotacaoCompra").alias("media_compra"),
    F.mean("cotacaoVenda").alias("media_venda"),
    F.mean("dif_venda_compra").alias("media_dif_venda_compra")
).show()

+------------+-----------+----------------------+
|media_compra|media_venda|media_dif_venda_compra|
+------------+-----------+----------------------+
|      5.3445|     5.3451|                6.0E-4|
+------------+-----------+----------------------+



In [13]:
# 5Ô∏è‚É£ Mostrar o registro com maior valor de cota√ß√£o de venda
print("\nRegistro com maior cota√ß√£o de venda:")
df_final.orderBy(F.col("cotacaoVenda").desc()).limit(1).show(truncate=False)


Registro com maior cota√ß√£o de venda:
+-----------------------+-------------+------------+----------------+
|dataHoraCotacao        |cotacaoCompra|cotacaoVenda|dif_venda_compra|
+-----------------------+-------------+------------+----------------+
|2025-11-06 13:02:49.425|5.3445       |5.3451      |6.0E-4          |
+-----------------------+-------------+------------+----------------+

