In [12]:
from datetime import datetime
from pydantic import ValidationError
import json
import httpx
from pathlib import Path
import tramitacao.camara.models as cm
import tramitacao.senado.models as sm
import time

CAMARA_BASE_URL = "https://dadosabertos.camara.leg.br/api/v2/"

In [13]:
with open("./data/camara/proposicoes_2020_2025.json") as f:
    house_props = cm.Proposicoes.model_validate_json(f.read())
    
with open("checked.json", "r") as f:
    checked = set(json.load(f))

In [None]:
def get_with_pagination(url: str):
    current_url = url
    items = []
    while True:
        response = httpx.get(current_url)
        response.raise_for_status()
        data = response.json()
        items.extend(data.get("dados", []))
        links = data.get("links", [])
        for link in links:
            if link.get('rel', '') == 'next':
                current_url = link['href']
                print(current_url)
                break
        else:
            break
    return items
        

    

In [22]:
n_props = len(house_props.items)
for i, prop in enumerate(house_props.items):
    print(f"{i + 1}/{n_props}", end="\r")
    if prop.id in checked:
        continue
    max_retries = 3
    for retry in range(max_retries):
        try:
            author_data = get_with_pagination(f"{CAMARA_BASE_URL}proposicoes/{prop.id}/autores")
            break
        except Exception as e:
            print(f"\nGot exception: {e}. Retrying.")
            time.sleep(1)
    else:
        print("Max retries.")
        break
    try:
        authors = [cm.Autor.model_validate(item) for item in author_data]
    except ValidationError as e:
        print(e)
        raise
    prop.autores[:] = authors
    checked.add(prop.id)
    with open("checked.json", "w") as f:
        json.dump(list(checked), f)
    
    if i % 1000 == 0:
        print("\nSaving.")
        with open("./data/camara/proposicoes_2020_2025.json", "w") as f:
            f.write(house_props.model_dump_json())
    

25001/116212
Saving.
26001/116212
Saving.
27001/116212
Saving.
28001/116212
Saving.
29001/116212
Saving.
29152/116212
Got exception: The read operation timed out. Retrying.
30001/116212
Saving.
31001/116212
Saving.
32001/116212
Saving.
33001/116212
Saving.
34001/116212
Saving.
35001/116212
Saving.
35648/116212
Got exception: The read operation timed out. Retrying.
35819/116212
Got exception: The read operation timed out. Retrying.
36001/116212
Saving.
37001/116212
Saving.
37645/116212
Got exception: The read operation timed out. Retrying.
37879/116212
Got exception: The read operation timed out. Retrying.
38001/116212
Saving.
39001/116212
Saving.
40001/116212
Saving.
41001/116212
Saving.
42001/116212
Saving.
43001/116212
Saving.
44001/116212
Saving.
45001/116212
Saving.
46001/116212
Saving.
47001/116212
Saving.
48001/116212
Saving.
49001/116212
Saving.
50001/116212
Saving.
51001/116212
Saving.
52001/116212
Saving.
53001/116212
Saving.
54001/116212
Saving.
55001/116212
Saving.
56001/116

In [15]:
house_props.items[0].id

618609

In [16]:
items = get_with_pagination(f"{CAMARA_BASE_URL}proposicoes/618609/autores")

In [19]:
cm.Autor.model_validate(items[0])

Autor(uri='https://dadosabertos.camara.leg.br/api/v2/deputados/160673', nome='Giovani Cherini', codTipo=10000, tipo='Deputado(a)', ordemAssinatura=1, proponente=1)