## 1) Coletando as séries do Sistema Gerenciador de Séries Temporais / BCB

In [34]:
import time
import math
import requests  # cliente HTTP
import pandas as pd
from datetime import datetime
from pathlib import Path

URL_BASE = "https://api.bcb.gov.br/dados/serie/bcdata.sgs.{codigo}/dados"

def baixar_sgs(codigo: int, data_inicial: str, data_final: str, tentativas_max: int = 5) -> pd.DataFrame:
    """
    Baixa UMA série do SGS em JSON e devolve DataFrame com colunas: ['dt', f'sgs_{codigo}'].
    data_inicial/data_final no formato 'dd/mm/aaaa'. Backoff simples em caso de erro HTTP.
    """
    parametros = {"formato": "json", "dataInicial": data_inicial, "dataFinal": data_final}
    for tentativa in range(tentativas_max):
        try:
            resp = requests.get(URL_BASE.format(codigo=codigo), params=parametros, timeout=30)
            resp.raise_for_status()
            dados = resp.json()
            df = pd.DataFrame(dados)
            if df.empty:
                return pd.DataFrame(columns=["dt", f"sgs_{codigo}"])
            # Converte data (pode vir dd/mm/aaaa ou mm/aaaa)
            def parsear_data(s):
                for fmt in ("%d/%m/%Y", "%m/%Y"):
                    try:
                        return datetime.strptime(s, fmt)
                    except ValueError:
                        pass
                return pd.NaT
            df["dt"] = df["data"].apply(parsear_data)
            df[f"sgs_{codigo}"] = pd.to_numeric(df["valor"].str.replace(",", "."), errors="coerce")
            saida = df[["dt", f"sgs_{codigo}"]].dropna(subset=["dt"]).copy()
            # normaliza para frequência mensal (MS)
            saida = (
                saida.set_index("dt")
                     .sort_index()
                     .asfreq("MS")  # Month Start
                     .ffill()       # preenche mês faltante com último valor
            )
            saida.reset_index(inplace=True)
            return saida
        except requests.RequestException as e:
            espera = 2 ** tentativa
            time.sleep(espera)
            if tentativa == tentativas_max - 1:
                raise e

def baixar_varias(codigos: list[int], data_inicial: str, data_final: str) -> pd.DataFrame:
    """
    Faz merge (outer) de várias séries por 'dt'. Retorna DataFrame com dt + colunas sgs_XXXX.
    """
    df_todas = None
    for codigo in codigos:
        df_serie = baixar_sgs(codigo, data_inicial, data_final)
        df_todas = df_serie if df_todas is None else df_todas.merge(df_serie, on="dt", how="outer")
    # index mensal e ordenação
    return df_todas.set_index("dt").sort_index().asfreq("MS")

# --- seus conjuntos de códigos ---
CODIGOS_LIVRES = [20635, 20662, 20634, 20718, 20740, 20717, 20787, 20809, 20786]
CODIGOS_ICC_LIVRES = [25355, 25356, 25354]  # opcional

CODIGOS_DIRECIONADOS = [20686, 20698, 20685, 20757, 20768, 20756, 20826, 20837, 20825]
CODIGOS_ICC_DIRECIONADOS = [25358, 25359, 25357]  # opcional

INCLUIR_ICC = True
CODIGOS_LIVRES_FINAIS = CODIGOS_LIVRES + (CODIGOS_ICC_LIVRES if INCLUIR_ICC else [])
CODIGOS_DIRECIONADOS_FINAIS = CODIGOS_DIRECIONADOS + (CODIGOS_ICC_DIRECIONADOS if INCLUIR_ICC else [])

# --- datas-alvo (ajuste conforme necessário) ---
DATA_INICIAL = "01/01/2011"
DATA_FINAL = datetime.today().strftime("%d/%m/%Y")

# --- baixa e organiza ---
df_livres = baixar_varias(CODIGOS_LIVRES_FINAIS, DATA_INICIAL, DATA_FINAL)
df_direcionados = baixar_varias(CODIGOS_DIRECIONADOS_FINAIS, DATA_INICIAL, DATA_FINAL)


conjunto_dados = df_livres.join(df_direcionados, how="outer")

conjunto_dados


Unnamed: 0_level_0,sgs_20635,sgs_20662,sgs_20634,sgs_20718,sgs_20740,sgs_20717,sgs_20787,sgs_20809,sgs_20786,sgs_25355,...,sgs_20685,sgs_20757,sgs_20768,sgs_20756,sgs_20826,sgs_20837,sgs_20825,sgs_25358,sgs_25359,sgs_25357
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-03-01,111138,81342,192480,27.05,47.59,37.92,15.52,35.19,25.93,,...,23174,9.57,9.28,9.46,3.51,3.88,3.64,,,
2011-04-01,104922,76370,181292,27.48,48.73,38.71,15.89,36.20,26.62,,...,22430,10.17,8.92,9.70,4.15,3.73,3.99,,,
2011-05-01,115169,86981,202150,27.20,48.46,38.40,15.73,36.09,26.46,,...,25679,10.40,9.46,10.05,4.30,3.73,4.09,,,
2011-06-01,115286,85337,200623,27.14,48.29,38.28,15.62,35.97,26.34,,...,29253,10.01,9.15,9.69,3.94,3.68,3.85,,,
2011-07-01,109709,84000,193708,27.51,48.62,38.67,15.82,36.11,26.54,,...,28639,9.62,9.73,9.66,3.48,3.72,3.57,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-01,259516,301404,560920,23.47,56.77,43.86,9.75,42.30,29.68,20.60,...,59003,18.32,11.37,12.92,5.09,4.47,4.61,10.97,9.54,10.01
2025-04-01,263452,309881,573332,24.21,58.01,45.01,10.65,43.75,31.02,20.88,...,70925,15.85,11.14,12.21,4.35,4.47,4.44,11.14,9.57,10.09
2025-05-01,271402,307525,578928,24.17,58.70,45.64,10.76,44.71,31.87,20.93,...,68547,14.67,11.27,12.04,4.50,4.46,4.47,11.21,9.60,10.14
2025-06-01,264523,304322,568844,24.29,58.42,45.51,10.91,44.40,31.73,21.34,...,67715,14.08,11.09,11.77,4.59,4.39,4.44,11.32,9.63,10.20


## Transposição da base: As séries serão linhas e as colunas as datas (mês/ano)

In [35]:
# 1) Garantir ordenação por data e formato amigável do rótulo de coluna (datas)
conjunto_dados = conjunto_dados.sort_index()
rotulos_datas = [dt.strftime("%Y-%m") for dt in conjunto_dados.index]

# 2) Transpor: linhas = séries, colunas = datas
tabela_series = conjunto_dados.T.copy()
tabela_series.columns = rotulos_datas

# 3) (Opcional) Renomear o índice (nome das linhas) para algo mais intuitivo
tabela_series.index.name = "Série"

tabela_series

Unnamed: 0_level_0,2011-03,2011-04,2011-05,2011-06,2011-07,2011-08,2011-09,2011-10,2011-11,2011-12,...,2024-10,2024-11,2024-12,2025-01,2025-02,2025-03,2025-04,2025-05,2025-06,2025-07
Série,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
sgs_20635,111138.0,104922.0,115169.0,115286.0,109709.0,124513.0,114845.0,108814.0,118129.0,132764.0,...,257922.0,253835.0,310403.0,238004.0,237764.0,259516.0,263452.0,271402.0,264523.0,253797.0
sgs_20662,81342.0,76370.0,86981.0,85337.0,84000.0,91737.0,88129.0,86639.0,90974.0,96629.0,...,309554.0,302438.0,316382.0,302714.0,292589.0,301404.0,309881.0,307525.0,304322.0,317637.0
sgs_20634,192480.0,181292.0,202150.0,200623.0,193708.0,216250.0,202974.0,195452.0,209103.0,229394.0,...,567476.0,556273.0,626785.0,540718.0,530276.0,560920.0,573332.0,578928.0,568844.0,571434.0
sgs_20718,27.05,27.48,27.2,27.14,27.51,26.27,26.4,25.99,25.69,24.27,...,21.38,21.66,21.73,24.13,23.78,23.47,24.21,24.17,24.29,25.02
sgs_20740,47.59,48.73,48.46,48.29,48.62,47.92,48.42,49.57,48.22,45.84,...,52.39,53.16,53.11,54.28,56.64,56.77,58.01,58.7,58.42,57.65
sgs_20717,37.92,38.71,38.4,38.28,38.67,37.7,37.99,38.39,37.56,35.54,...,40.17,40.86,40.74,42.46,43.85,43.86,45.01,45.64,45.51,45.4
sgs_20787,15.52,15.89,15.73,15.62,15.82,15.06,15.8,15.76,15.73,14.31,...,9.82,9.62,8.56,10.61,10.22,9.75,10.65,10.76,10.91,11.61
sgs_20809,35.19,36.2,36.09,35.97,36.11,35.99,37.1,38.47,37.5,35.15,...,40.25,40.54,39.25,39.9,42.35,42.3,43.75,44.71,44.4,43.64
sgs_20786,25.93,26.62,26.46,26.34,26.54,26.11,27.01,27.7,27.2,25.2,...,28.26,28.46,27.16,28.42,29.84,29.68,31.02,31.87,31.73,31.61
sgs_25355,,,,,,,,,,,...,19.45,19.48,19.35,20.13,20.38,20.6,20.88,20.93,21.34,21.38


### Renomeando as linhas para tornar os códigos identificáveis com seus respectivos indicadores

In [36]:
mapa_series = {
    # --------- RECURSOS LIVRES (RL) ---------
    "sgs_20635": "concessoes_RL_PJ_20635",
    "sgs_20662": "concessoes_RL_PF_20662",
    "sgs_20634": "concessoes_RL_TOT_20634",

    "sgs_20718": "taxa_RL_PJ_20718",
    "sgs_20740": "taxa_RL_PF_20740",
    "sgs_20717": "taxa_RL_TOT_20717",

    "sgs_20787": "spread_RL_PJ_20787",
    "sgs_20809": "spread_RL_PF_20809",
    "sgs_20786": "spread_RL_TOT_20786",

    "sgs_25355": "icc_RL_PJ_25355",
    "sgs_25356": "icc_RL_PF_25356",
    "sgs_25354": "icc_RL_TOT_25354",

    # --------- RECURSOS DIRECIONADOS (RD) ---------
    "sgs_20686": "concessoes_RD_PJ_20686",
    "sgs_20698": "concessoes_RD_PF_20698",
    "sgs_20685": "concessoes_RD_TOT_20685",

    "sgs_20757": "taxa_RD_PJ_20757",
    "sgs_20768": "taxa_RD_PF_20768",
    "sgs_20756": "taxa_RD_TOT_20756",

    "sgs_20826": "spread_RD_PJ_20826",
    "sgs_20837": "spread_RD_PF_20837",
    "sgs_20825": "spread_RD_TOT_20825",

    "sgs_25358": "icc_RD_PJ_25358",
    "sgs_25359": "icc_RD_PF_25359",
    "sgs_25357": "icc_RD_TOT_25357",
}

# renomeia o índice (linhas) usando o dicionário
tabela_series = tabela_series.rename(index=mapa_series)

tabela_series

Unnamed: 0_level_0,2011-03,2011-04,2011-05,2011-06,2011-07,2011-08,2011-09,2011-10,2011-11,2011-12,...,2024-10,2024-11,2024-12,2025-01,2025-02,2025-03,2025-04,2025-05,2025-06,2025-07
Série,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
concessoes_RL_PJ_20635,111138.0,104922.0,115169.0,115286.0,109709.0,124513.0,114845.0,108814.0,118129.0,132764.0,...,257922.0,253835.0,310403.0,238004.0,237764.0,259516.0,263452.0,271402.0,264523.0,253797.0
concessoes_RL_PF_20662,81342.0,76370.0,86981.0,85337.0,84000.0,91737.0,88129.0,86639.0,90974.0,96629.0,...,309554.0,302438.0,316382.0,302714.0,292589.0,301404.0,309881.0,307525.0,304322.0,317637.0
concessoes_RL_TOT_20634,192480.0,181292.0,202150.0,200623.0,193708.0,216250.0,202974.0,195452.0,209103.0,229394.0,...,567476.0,556273.0,626785.0,540718.0,530276.0,560920.0,573332.0,578928.0,568844.0,571434.0
taxa_RL_PJ_20718,27.05,27.48,27.2,27.14,27.51,26.27,26.4,25.99,25.69,24.27,...,21.38,21.66,21.73,24.13,23.78,23.47,24.21,24.17,24.29,25.02
taxa_RL_PF_20740,47.59,48.73,48.46,48.29,48.62,47.92,48.42,49.57,48.22,45.84,...,52.39,53.16,53.11,54.28,56.64,56.77,58.01,58.7,58.42,57.65
taxa_RL_TOT_20717,37.92,38.71,38.4,38.28,38.67,37.7,37.99,38.39,37.56,35.54,...,40.17,40.86,40.74,42.46,43.85,43.86,45.01,45.64,45.51,45.4
spread_RL_PJ_20787,15.52,15.89,15.73,15.62,15.82,15.06,15.8,15.76,15.73,14.31,...,9.82,9.62,8.56,10.61,10.22,9.75,10.65,10.76,10.91,11.61
spread_RL_PF_20809,35.19,36.2,36.09,35.97,36.11,35.99,37.1,38.47,37.5,35.15,...,40.25,40.54,39.25,39.9,42.35,42.3,43.75,44.71,44.4,43.64
spread_RL_TOT_20786,25.93,26.62,26.46,26.34,26.54,26.11,27.01,27.7,27.2,25.2,...,28.26,28.46,27.16,28.42,29.84,29.68,31.02,31.87,31.73,31.61
icc_RL_PJ_25355,,,,,,,,,,,...,19.45,19.48,19.35,20.13,20.38,20.6,20.88,20.93,21.34,21.38


## Importando o CSV da Taxa Selic

In [37]:
# 1) Ler CSV da Selic e obter a meta vigente mensal (% a.a.)
caminho_csv = Path("meta-para-a-taxa-selic.csv")

# ler CSV
try:
    df_selic = pd.read_csv(caminho_csv, sep=';', encoding='utf-8-sig')
except UnicodeDecodeError:
    df_selic = pd.read_csv(caminho_csv, sep=';', encoding='latin-1')

# renomear
df_selic = df_selic.rename(columns={
    "DateTime": "data_evento",
    "Meta para a taxa Selic": "selic_meta_aa"
})

df_selic
#

Unnamed: 0,data_evento,selic_meta_aa
0,2015-09-21,1425
1,2015-09-22,1425
2,2015-09-23,1425
3,2015-09-24,1425
4,2015-09-25,1425
...,...,...
3649,2025-09-17,15
3650,2025-09-18,15
3651,2025-09-19,15
3652,2025-09-20,15


In [39]:
# parse ISO, filtra nulos
df_selic["data_evento"] = pd.to_datetime(df_selic["data_evento"], format="%Y-%m-%d", errors="coerce")
df_selic = df_selic.dropna(subset=["data_evento"])

# reduzir p/ ano-mês e ancorar no 1º dia do mês (sem "MS" no to_timestamp)
df_selic["data_evento"] = df_selic["data_evento"].dt.to_period("M").dt.to_timestamp()

# ordena e remove duplicatas mensais (fica com o último registro do mês)
df_selic = (df_selic.sort_values("data_evento")
                     .drop_duplicates(subset=["data_evento"], keep="last")
                     .reset_index(drop=True))

# normalizando igual fizemos com a tabela do sgs
df_selic["selic_meta_aa"] = pd.to_numeric(
    df_selic["selic_meta_aa"].astype(str).str.replace(",", "."),
    errors="coerce"
)

df_selic


Unnamed: 0,data_evento,selic_meta_aa
0,2015-09-01,14.25
1,2015-10-01,14.25
2,2015-11-01,14.25
3,2015-12-01,14.25
4,2016-01-01,14.25
...,...,...
116,2025-05-01,14.25
117,2025-06-01,14.75
118,2025-07-01,15.00
119,2025-08-01,15.00
