Consultar o valor do Dolar por dia com API

In [1]:
import pandas as pd
import requests
from datetime import date, datetime
import time
import duckdb

# opcional: instalar yfinance se necessário
# !pip install yfinance
import yfinance as yf

In [2]:
# ---------------------------
# Helpers de LOG
# ---------------------------
def now():
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

class Step:
    def __init__(self, name):
        self.name = name
        self.t0 = time.time()
        print(f"[{now()}] [{self.name}] INÍCIO")
    def ok(self, extra="OK"):
        dt = time.time() - self.t0
        print(f"[{now()}] [{self.name}] FIM — {extra} ({dt:.2f}s)")
    def fail(self, err):
        dt = time.time() - self.t0
        print(f"[{now()}] [{self.name}] FALHA — {err} ({dt:.2f}s)")

In [3]:

# =============================================================================
# 1) PTAX (BCB) — fechamento diário
# =============================================================================
s = Step("PTAX/BCB - Coleta")
try:
    ini = "04-01-2025"
    fim = date.today().strftime("%m-%d-%Y")
    url = (
      "https://olinda.bcb.gov.br/olinda/servico/PTAX/versao/v1/odata/"
      f"CotacaoDolarPeriodo(dataInicial=@ini,dataFinalCotacao=@fim)?"
      f"@ini='{ini}'&@fim='{fim}'&$select=cotacaoCompra,dataHoraCotacao&$top=10000&$format=json"
    )

    data = requests.get(url, timeout=30).json()["value"]
    df = pd.DataFrame(data)

    # total bruto (todas as cotações do dia)
    print(f"[{now()}] [PTAX] Registros brutos: {len(df)}")

    # última cotação do dia (fechamento)
    df["data"] = pd.to_datetime(df["dataHoraCotacao"]).dt.date
    df_daily = df.sort_values("dataHoraCotacao").groupby("data").tail(1)
    df_daily = df_daily[["data","cotacaoCompra"]].reset_index(drop=True)

    # formata data para dd/mm/yyyy (exibição)
    df_daily["data"] = pd.to_datetime(df_daily["data"]).dt.strftime("%d/%m/%Y")

    # janela de datas (convertendo para comparar corretamente)
    min_ptax = pd.to_datetime(df_daily["data"], dayfirst=True).min().date()
    max_ptax = pd.to_datetime(df_daily["data"], dayfirst=True).max().date()
    print(f"[{now()}] [PTAX] Fechamentos diários: {len(df_daily)} | Janela: {min_ptax} → {max_ptax}")

    s.ok()
except Exception as e:
    s.fail(e)
    raise

[2025-10-25 13:57:01] [PTAX/BCB - Coleta] INÍCIO
[2025-10-25 13:57:02] [PTAX] Registros brutos: 146
[2025-10-25 13:57:02] [PTAX] Fechamentos diários: 145 | Janela: 2025-04-01 → 2025-10-24
[2025-10-25 13:57:02] [PTAX/BCB - Coleta] FIM — OK (1.19s)


In [4]:
# =============================================================================
# 2) PTAX — carga no DuckDB
# =============================================================================
s = Step("PTAX/BCB - Carga DuckDB")
try:
    DB_PATH = "dados_dolar.duckdb"
    con = duckdb.connect(DB_PATH)

    con.execute("""
    CREATE TABLE IF NOT EXISTS dolar_diario (
      data DATE,
      cotacao_compra DOUBLE
    );
    """)

    con.register("df_stage", df_daily)
    con.execute("""
    MERGE INTO dolar_diario AS t
    USING (
      SELECT
        STRPTIME(data, '%d/%m/%Y')::DATE AS data,
        CAST(cotacaoCompra AS DOUBLE)     AS cotacao_compra
      FROM df_stage
    ) AS s
    ON t.data = s.data
    WHEN MATCHED THEN UPDATE SET
      cotacao_compra = s.cotacao_compra
    WHEN NOT MATCHED THEN INSERT (data, cotacao_compra)
    VALUES (s.data, s.cotacao_compra);
    """)

    total_ptax_db = con.execute("SELECT COUNT(*) FROM dolar_diario;").fetchone()[0]
    print(f"[{now()}] [PTAX] Linhas na tabela dolar_diario: {total_ptax_db}")

    con.execute("""
    COPY dolar_diario TO './exports/dolar_diario.csv' (HEADER, DELIMITER ',');
    """)
    print(f"[{now()}] [PTAX] Export CSV -> ./exports/dolar_diario.csv")

    con.unregister("df_stage")
    con.close()
    s.ok("Carga concluída")
except Exception as e:
    s.fail(e)
    raise

[2025-10-25 13:58:21] [PTAX/BCB - Carga DuckDB] INÍCIO
[2025-10-25 13:58:21] [PTAX] Linhas na tabela dolar_diario: 145
[2025-10-25 13:58:21] [PTAX] Export CSV -> ./exports/dolar_diario.csv
[2025-10-25 13:58:21] [PTAX/BCB - Carga DuckDB] FIM — Carga concluída (0.19s)


In [None]:
# =============================================================================
# 3) Yahoo Finance — OHLC diário USDBRL=X
# =============================================================================
s = Step("Yahoo OHLC - Coleta")
try:
    ANO = date.today().year
    inicio = pd.Timestamp(ANO, 4, 1)
    fim_desejado = pd.Timestamp(ANO, 10, 31)
    fim = min(fim_desejado, pd.Timestamp.today().normalize())

    ticker = "USDBRL=X"

    df_yf = yf.download(
      ticker,
      start=inicio,
      end=fim + pd.Timedelta(days=1),  # inclui o último dia
      interval="1d",
      auto_adjust=False,
      progress=False,
    )
    if df_yf.empty:
        raise SystemExit("Nenhum dado retornado pelo Yahoo Finance para o período.")

    ohlc = (
      df_yf.reset_index()[["Date", "Open", "High", "Low", "Close"]]
        .rename(columns={
          "Date": "data",
          "Open": "abertura",
          "High": "alta",
          "Low": "baixa",
          "Close": "fechamento",
        })
    )
    ohlc[["abertura","fechamento","alta","baixa"]] = (
      ohlc[["abertura","fechamento","alta","baixa"]].round(4)
    )

    min_ohlc = pd.to_datetime(ohlc["data"]).min().date()
    max_ohlc = pd.to_datetime(ohlc["data"]).max().date()
    print(f"[{now()}] [YF] Linhas OHLC: {len(ohlc)} | Janela: {min_ohlc} → {max_ohlc}")

    s.ok()
except Exception as e:
    s.fail(e)
    raise

[2025-10-25 14:00:46] [Yahoo OHLC - Coleta] INÍCIO
[2025-10-25 14:00:47] [YF] Linhas OHLC: 147 | Janela: 2025-04-01 → 2025-10-24
[2025-10-25 14:00:47] [Yahoo OHLC - Coleta] FIM — OK (0.52s)


Price,data,abertura,alta,baixa,fechamento
Ticker,Unnamed: 1_level_1,USDBRL=X,USDBRL=X,USDBRL=X,USDBRL=X
0,2025-04-01,5.6984,5.7484,5.6815,5.6984
1,2025-04-02,5.6813,5.7120,5.6583,5.6813
2,2025-04-03,5.6637,5.7476,5.5867,5.6637
3,2025-04-04,5.6306,5.8221,5.6199,5.6306
4,2025-04-07,5.8400,5.9255,5.7994,5.8400
...,...,...,...,...,...
142,2025-10-20,5.4061,5.4370,5.3661,5.4061
143,2025-10-21,5.3687,5.4034,5.3634,5.3687
144,2025-10-22,5.3867,5.3995,5.3768,5.3867
145,2025-10-23,5.3994,5.4035,5.3727,5.3994


In [13]:
# =============================================================================
# 4) Yahoo OHLC — carga no DuckDB
# =============================================================================
s = Step("Yahoo OHLC - Carga DuckDB")
try:
    DB_PATH = "dados_dolar.duckdb"
    TABELA = "dolar_ohlc"
    con = duckdb.connect(DB_PATH)

    con.execute(f"""
    CREATE TABLE IF NOT EXISTS {TABELA} (
     data DATE PRIMARY KEY,
     abertura  DOUBLE,
     fechamento DOUBLE,
     alta    DOUBLE,
     baixa   DOUBLE
    );
    """)

    print( ohlc.columns.tolist())                       # no pandas
    print(con.execute("SELECT * FROM dolar_ohlc LIMIT 0").fetchdf().columns.tolist())  # no DuckDB

    con.register("df_stage", ohlc)
    # UPSERT simples usando a PK(data)
    con.execute(f"""
    INSERT OR REPLACE INTO {TABELA}
    SELECT CAST(data AS DATE), abertura, fechamento, alta, baixa
    FROM df_stage;
    """)

    total_ohlc_db = con.execute(f"SELECT COUNT(*) FROM {TABELA};").fetchone()[0]
    print(f"[{now()}] [YF] Linhas na tabela {TABELA}: {total_ohlc_db}")

    con.execute("""
    COPY dolar_ohlc TO './exports/dolar_ohlc.csv' (HEADER, DELIMITER ',');
    """)
    print(f"[{now()}] [YF] Export CSV -> ./exports/dolar_ohlc.csv")

    con.unregister("df_stage")
    con.close()

    # também exporta o DF direto (útil pra auditoria)
    ohlc.to_csv('./exports/dolar_ohlc_yahoo.csv', index=False)
    print(f"[{now()}] [YF] Export CSV (via pandas) -> ./exports/dolar_ohlc_yahoo.csv")

    s.ok("Carga concluída")
except Exception as e:
    s.fail(e)
    raise

[2025-10-25 14:05:41] [Yahoo OHLC - Carga DuckDB] INÍCIO
[('data', ''), ('abertura', 'USDBRL=X'), ('alta', 'USDBRL=X'), ('baixa', 'USDBRL=X'), ('fechamento', 'USDBRL=X')]
['data', 'abertura', 'fechamento', 'alta', 'baixa']
[2025-10-25 14:05:41] [Yahoo OHLC - Carga DuckDB] FALHA — Binder Error: Referenced column "data" not found in FROM clause!
Candidate bindings: "('data'", "('alta'", "('baixa'", "('abertura'"

LINE 3:     SELECT CAST(data AS DATE), abertura, fechamento, alta, baixa
                        ^ (0.01s)


BinderException: Binder Error: Referenced column "data" not found in FROM clause!
Candidate bindings: "('data'", "('alta'", "('baixa'", "('abertura'"

LINE 3:     SELECT CAST(data AS DATE), abertura, fechamento, alta, baixa
                        ^

In [8]:
# =============================================================================
# 5) Resumo final (logs de status + contagens)
# =============================================================================
print("\nResumo – Séries (PTAX & Yahoo)")
print("--------------------------------")
print(f"[{now()}] PTAX diário: {len(df_daily)} | Janela: {min_ptax} → {max_ptax}")
print(f"[{now()}] OHLC linhas: {len(ohlc)} | Janela: {min_ohlc} → {max_ohlc}")


Resumo – Séries (PTAX & Yahoo)
--------------------------------
[2025-10-25 14:00:03] PTAX diário: 145 | Janela: 2025-04-01 → 2025-10-24
[2025-10-25 14:00:03] OHLC linhas: 147 | Janela: 2025-04-01 → 2025-10-24
