In [1]:
import json
import csv
import random

import urllib3
from urllib3.util.timeout import Timeout

timeout = Timeout(connect=2.0, read=120.0)
http = urllib3.PoolManager(timeout=timeout)


In [2]:
def get_data(uri, endpoint, params=None):
    """
    Get data from the Jatai API.
    :param endpoint: The API endpoint to query.
    :param params: Optional parameters to include in the request.
    :return: The response data as a dictionary.
    """
    if params:
        if isinstance(params, dict):
            params = '&'.join(f'{k}={v}' for k, v in params.items())
        else:
            params = str(params)
        endpoint += '?' + params

    # Randomize the user agent to avoid being blocked
    user_agent = random.choice([
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.1 Safari/605.1.15',
        'Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Mobile Safari/537.36'
    ])
    headers = {
        'User-Agent': user_agent,
        'Accept': 'application/json',
        'Content-Type': 'application/json'
    }
    response = http.request('GET', uri.format(endpoint=endpoint), headers=headers)
    if response.status != 200:
        raise Exception(f"Error {response.status}: {response.data.decode('utf-8')}")
    return json.loads(response.data.decode('utf-8'))

In [3]:

def get_csv_tramitacoes(uri, endpoint, data_ini, data_fim):
    params = {
        'data_tramitacao__range': f'{data_ini},{data_fim}',
        'get_all': 'true',
        'o': 'materia__ano,materia__tipo,materia__numero',
    }

    results = get_data(uri, endpoint, params)
    results = sorted(results, key=lambda x: (x['materia'], -x['id']))

    db = {
        'statustramitacao': {},
        'unidadetramitacao': {},
        'materialegislativa': {}
    }

    map_field_model = {
        'status': 'statustramitacao',
        'unidade_tramitacao_local': 'unidadetramitacao',
        'unidade_tramitacao_destino': 'unidadetramitacao',
        'materia': 'materialegislativa'
    }

    for r in results:
        for field, model in map_field_model.items():
            if r[field] not in db[model]:
                db[model][r[field]] = get_data(uri, f'/api/materia/{model}/{r[field]}/')

    with open('tramitacoes.csv', 'w', newline='', encoding='utf-8') as cf:
        # Create a CSV writer object
        writer = csv.writer(cf, delimiter=';', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
        # Write the header row
        writer.writerow(['Data da Tramitação', 'Matéria', 'Unidade de Tramitação Local', 'Unidade de Tramitação Destino', 'Status', 'Texto'])
        # Write the data rows
        for r in results:
            # Write the data row
            writer.writerow([
                r['data_tramitacao'],
                db['materialegislativa'][r['materia']]['__str__'],
                db['unidadetramitacao'][r['unidade_tramitacao_local']]['__str__'],
                db['unidadetramitacao'][r['unidade_tramitacao_destino']]['__str__'],
                db['statustramitacao'][r['status']]['__str__'],
                r['texto']
           ])

In [5]:
uri = 'https://www.jatai.go.leg.br{endpoint}'

get_csv_tramitacoes(uri, '/api/materia/tramitacao/', '2025-05-01', '2025-05-31')