In [44]:
import json
import httpx
import asyncio
from datetime import date, timedelta
import itertools
import random

from tramita.sources.camara.client import camara_fetch
from tramita.http.client import HttpClient

SENATE_URL = "https://legis.senado.leg.br/dadosabertos/"
CAMARA_URL="https://dadosabertos.camara.leg.br/api/v2/"
SIGLA_PROP_INCLUDES = [
    # Núcleo do processo legislativo
    "PL", "PLP", "PLC", "PLS", "PLN", "PLV", "PEC", "MPV", "PDC", "PDL", "PDN", "PDS",
    # Emendas e correlatos (mínimo útil para tramitação)
    "EMC",  # Emenda na Comissão
    "EMP",  # Emenda de Plenário
    "EMS",  # Emenda/Substitutivo do Senado
    "EMR",  # Emenda de Relator
    "ESB",  # Emenda ao Substitutivo
    "ERD",  # Emenda de Redação
    "EMA",  # Emenda Aglutinativa de Plenário
    "EAG",  # Emenda Substitutiva Aglutinativa Global
    "EPP",  # Emenda ao PPA
    "EMPV",  # Emenda à Medida Provisória (CN)
    # Substitutivos e subemendas
    "SBT",  # Substitutivo
    "SBE",  # Subemenda
    "ESP",  # Emenda Substitutiva de Plenário
    "SSP",  # Subemenda Substitutiva de Plenário
    # (opcionalmente, versões “-A” adotadas pela comissão)
    "SBT-A", "SBE-A", "EMC-A",
]


In [26]:
dates = [date(2024, 1, 1)]
while dates[-1] < date(2025, 1, 1):
    dates.append(dates[-1] + timedelta(days=15))

In [27]:
dates = [(d1, d2 - timedelta(days=1)) for d1, d2 in itertools.pairwise(dates)]
dates

[(datetime.date(2024, 1, 1), datetime.date(2024, 1, 15)),
 (datetime.date(2024, 1, 16), datetime.date(2024, 1, 30)),
 (datetime.date(2024, 1, 31), datetime.date(2024, 2, 14)),
 (datetime.date(2024, 2, 15), datetime.date(2024, 2, 29)),
 (datetime.date(2024, 3, 1), datetime.date(2024, 3, 15)),
 (datetime.date(2024, 3, 16), datetime.date(2024, 3, 30)),
 (datetime.date(2024, 3, 31), datetime.date(2024, 4, 14)),
 (datetime.date(2024, 4, 15), datetime.date(2024, 4, 29)),
 (datetime.date(2024, 4, 30), datetime.date(2024, 5, 14)),
 (datetime.date(2024, 5, 15), datetime.date(2024, 5, 29)),
 (datetime.date(2024, 5, 30), datetime.date(2024, 6, 13)),
 (datetime.date(2024, 6, 14), datetime.date(2024, 6, 28)),
 (datetime.date(2024, 6, 29), datetime.date(2024, 7, 13)),
 (datetime.date(2024, 7, 14), datetime.date(2024, 7, 28)),
 (datetime.date(2024, 7, 29), datetime.date(2024, 8, 12)),
 (datetime.date(2024, 8, 13), datetime.date(2024, 8, 27)),
 (datetime.date(2024, 8, 28), datetime.date(2024, 9, 11)),

In [28]:
sem = asyncio.Semaphore(15)

async def get_props(d1, d2, sem, client: HttpClient):
    async with sem:
        print(d1, d2)
        return await camara_fetch(client, f"{CAMARA_URL}/proposicoes", params={
            "dataInicio": d1.isoformat(),
            "dataFim": d2.isoformat(),
        })
    
async with HttpClient(CAMARA_URL, rate_per_sec=15.0, timeout=20.0, user_agent="anonymous") as client:
    tasks = [get_props(d1, d2, sem, client) for d1, d2 in dates]
    result = await asyncio.gather(*tasks, return_exceptions=True)

2024-01-01 2024-01-15
2024-01-16 2024-01-30
2024-01-31 2024-02-14
2024-02-15 2024-02-29
2024-03-01 2024-03-15
2024-03-16 2024-03-30
2024-03-31 2024-04-14
2024-04-15 2024-04-29
2024-04-30 2024-05-14
2024-05-15 2024-05-29
2024-05-30 2024-06-13
2024-06-14 2024-06-28
2024-06-29 2024-07-13
2024-07-14 2024-07-28
2024-07-29 2024-08-12
2024-08-13 2024-08-27
2024-08-28 2024-09-11
2024-09-12 2024-09-26
2024-09-27 2024-10-11
2024-10-12 2024-10-26
2024-10-27 2024-11-10
2024-11-11 2024-11-25
2024-11-26 2024-12-10
2024-12-11 2024-12-25
2024-12-26 2025-01-09


In [30]:
result = [res for res in result if not isinstance(res, Exception)]

In [31]:
props = [p for res in result for p in res]

In [35]:
props = [p for p in props if p['siglaTipo'] in SIGLA_PROP_INCLUDES]

In [37]:
uniqueprops = dict()

In [38]:
for p in props:
    uniqueprops.setdefault(p['id'], p)

In [40]:
len(uniqueprops)

25816

In [41]:
props = list(uniqueprops.values())

In [45]:
len(props)

25816

In [46]:
async def get_rel_types(pid, sem, client: httpx.AsyncClient):
    async with sem:
        print(pid)
        response = await client.get(f"{CAMARA_URL}/proposicoes/{pid}/relacionadas")
        response.raise_for_status()
        data = response.json()
        props = data['dados']
        siglas = set()
        for prop in props:
            siglas.add(prop.get("siglaTipo"))
        return sorted(list(siglas))
    
sem = asyncio.Semaphore(20)

pids = random.choices([p['id'] for p in props], k=2000)

async with httpx.AsyncClient() as client:
    tasks = [get_rel_types(pid, sem, client) for pid in pids]
    result = await asyncio.gather(*tasks, return_exceptions=True)

2350921
2376429
2236933
2268818
2421321
2264847
2416430
2465244
2433686
2081843
2239471
2274429
2421036
492476
555619
483845
2455486
2419250
2428685
2439670
2214373
2160484
2476076
2088367
2471756
2427040
1302323
2453337
2466432
2168147
2332254
2349243
135702
2477824
2277242
2469909
2423573
2419631
2421211
2301550
316782
955550
2248305
2160209
2477351
2406627
2332656
2168640
2470700
2448426
2227859
2421220
2471905
2392077
2136636
2227243
2448147
2422751
2319590
2441087
349531
2427750
2384856
2088930
2432567
2208694
2194799
2347139
2406903
2058460
2212434
2421628
2447671
2253600
2467397
2234184
2254063
619119
2440862
2450426
2124631
2367723
2448973
557344
2426363
2388827
2423575
2218275
2395748
2430641
2422504
2153842
2448063
2234135
2334924
2456722
2457234
2430152
2477160
2074938
2348355
2144038
532194
2242580
2434627
2419125
495926
2309865
2458210
2348359
2371606
2344528
2373681
2446966
2467307
2356304
2386915
2361123
2464758
530572
2431015
2458006
2471073
2325511
2440994
2417121
2268

In [47]:
d = defaultdict(set)

NameError: name 'defaultdict' is not defined

In [51]:
propsample = [p for p in props if p['id'] in pids]

In [52]:
propsample = {p['id']: p for p in propsample}

In [53]:
from collections import defaultdict

In [64]:
tps = defaultdict(set)

for pid, r in zip(pids, result):
    if not isinstance(r, Exception):
        orig_sigla = propsample[pid]['siglaTipo']
        tps[orig_sigla].update(r)

In [65]:
tps

defaultdict(set,
            {'PL': {'AA',
              'ATA',
              'CVO',
              'DOC',
              'DTQ',
              'EMC',
              'EMC-A',
              'EMP',
              'EMR',
              'EMS',
              'ERD',
              'ESB',
              'MSC',
              'MST',
              'OF',
              'PAR',
              'PARF',
              'PEP',
              'PES',
              'PL',
              'PPP',
              'PPR',
              'PRL',
              'PRLE',
              'PRLP',
              'PRR',
              'PRT',
              'PRV',
              'PSS',
              'RDF',
              'REC',
              'REQ',
              'RIC',
              'RPD',
              'RPDR',
              'SBE',
              'SBE-A',
              'SBR',
              'SBT',
              'SBT-A',
              'SSP',
              'VTS'},
             'EMP': {'REQ'},
             'SBT-A': {'SBR'},
             'PDC': {'PAR',