In [3]:
import duckdb, os
ROOT = "data/bronze/snapshots/bronze-2020-2024-v1/camara"

con = duckdb.connect()
# Helpful: see parquet stats without loading all rows
def peek(path: str, n: int = 5):
    q = f"SELECT * FROM read_parquet('{path}') LIMIT {n}"
    return con.execute(q).df()

# 1) List top-level “tables”
for name in sorted(os.listdir(ROOT)):
    p = os.path.join(ROOT, name)
    if os.path.isdir(p): print(name, "->", p)

# 2) Read a partitioned dataset (DuckDB auto-detects Hive partitions like year=YYYY)
proposicoes_details = f"{ROOT}/proposicoes/details/**/*.parquet"
df_head = con.execute(f"""
    SELECT *
    FROM read_parquet('{proposicoes_details}')
    LIMIT 20
""").df()
df_head  # In VS Code Jupyter, right-click -> “View Value in Data Viewer”

# 3) Quick row counts by year (from partition directory)
counts = con.execute(f"""
    SELECT year, count(*) AS rows
    FROM read_parquet('{proposicoes_details}')
    GROUP BY year
    ORDER BY year
""").df()
counts

# 4) Browse another topic (e.g., votações) in one shot
votacoes = f"{ROOT}/votacoes/**/*.parquet"
con.execute(f"SELECT year, count(*) AS n FROM read_parquet('{votacoes}') GROUP BY year ORDER BY year").df()

# 5) Schema/glimpse without full scan
con.execute(f"""
    DESCRIBE SELECT * FROM read_parquet('{proposicoes_details}')
""").df()

# 6) Filter + sample “like Excel” exploration
sample = con.execute(f"""
    SELECT *
    FROM read_parquet('{proposicoes_details}')
    WHERE year=2024
    QUALIFY row_number() OVER () <= 1000
""").df()
sample


autores -> data/bronze/snapshots/bronze-2020-2024-v1/camara/autores
blocos -> data/bronze/snapshots/bronze-2020-2024-v1/camara/blocos
deputados -> data/bronze/snapshots/bronze-2020-2024-v1/camara/deputados
eventos -> data/bronze/snapshots/bronze-2020-2024-v1/camara/eventos
frentes -> data/bronze/snapshots/bronze-2020-2024-v1/camara/frentes
legislaturas -> data/bronze/snapshots/bronze-2020-2024-v1/camara/legislaturas
orgaos -> data/bronze/snapshots/bronze-2020-2024-v1/camara/orgaos
orientacoes -> data/bronze/snapshots/bronze-2020-2024-v1/camara/orientacoes
partidos -> data/bronze/snapshots/bronze-2020-2024-v1/camara/partidos
proposicoes -> data/bronze/snapshots/bronze-2020-2024-v1/camara/proposicoes
referencias -> data/bronze/snapshots/bronze-2020-2024-v1/camara/referencias
relacionadas -> data/bronze/snapshots/bronze-2020-2024-v1/camara/relacionadas
temas -> data/bronze/snapshots/bronze-2020-2024-v1/camara/temas
tramitacoes -> data/bronze/snapshots/bronze-2020-2024-v1/camara/tramitacoe

Unnamed: 0,source,entity,id,url,payload_json,payload_sha256,year
0,camara,proposicoes,15151,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
1,camara,proposicoes,15197,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
2,camara,proposicoes,15508,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
3,camara,proposicoes,15598,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
4,camara,proposicoes,16002,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
...,...,...,...,...,...,...,...
995,camara,proposicoes,395149,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
996,camara,proposicoes,395153,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
997,camara,proposicoes,395154,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
998,camara,proposicoes,395155,https://dadosabertos.camara.leg.br/api/v2/prop...,"{""dados"":[""id"",""uri"",""siglaTipo"",""codTipo"",""nu...",cb3f65059bf3a1c2ed45adc167e93333f5ecab4c2f274d...,2024
