In [1]:
import pandas as pd
import numpy as np
import requests
import re

##### Token below is to access API key for presupuesto abierto
Link to source: https://www.presupuestoabierto.gob.ar/api/

In [2]:
token = "30338571-3593-47f1-8b64-351a6aeacac3"

#### Requirements to access data:
- Requires 3 parameters:
    - columns you want to access
    - Years of data
    - filters

In [3]:
def pull_budget_data(years: list[int], columns: list[str], filters: list[dict], token: str) -> pd.DataFrame:
    
    url = "https://www.presupuestoabierto.gob.ar/api/v1/credito"
    headers = {
        "Authorization": token,  # you can access this token via presupuesto abierto
        "Content-Type": "application/json"
    }
    parameters = {
        "title": "Gastos por jurisdicción",
        "ejercicios": years, # years of data you want to pull (in this format --> [2022]
        "columns": columns, # These are the columns that we will access (in a list format)
        "filters": filters  # These are the filters that we set (in a list format)
    }
    
    response = requests.post(url, headers=headers, json=parameters)
    if response.status_code == 200:
        print("Request successful")
        # Decode the response content using utf-8-sig codec
        decoded_response = response.content.decode("utf-8-sig")
        lines = decoded_response.split('\n') # The response returns a bunch of lines, each line is a new row of data
        headers = re.split(r',(?!\s)', lines[0]) # Uses a regular expression to split each column based on the commas. It must be a comma with no space directly after, as in these cases, it is a program name. This specifically represents the column names
        elements = []
        # The loop below goes through each line and creates a list for each line. These are our rows
        for i in range(1, len(lines)):
            # Split the string using a regular expression pattern and add to list with all the lines
            elements.append(re.split(r',(?!\s)', lines[i])) 
    
        budget_data = pd.DataFrame(elements[:-1], columns = headers) # creates a dataframe based on the above up to the second to last row since last line did not have any data
        
        # Three lines below convert format of data to float
        budget_data['credito_presupuestado'] = budget_data['credito_presupuestado'].astype(float)
        budget_data['credito_vigente'] = budget_data['credito_vigente'].astype(float)
        budget_data['credito_devengado'] = budget_data['credito_devengado'].astype(float)
    
    else:
        print("Failed to make request:", response.text)
        budget_data = 'NA'
    return budget_data


In [4]:
years = [2024]
columns = [
    "caracter_id",
    "caracter_desc",
    "funcion_desc",
    "ejercicio_presupuestario",
    "credito_presupuestado",
    "credito_vigente",
    "credito_devengado"
    ]
filters = []

pull_budget_data(years, columns, filters, token)

Request successful


Unnamed: 0,caracter_id,caracter_desc,funcion_desc,ejercicio_presupuestario,credito_presupuestado,credito_vigente,credito_devengado
0,1,Administración Central,Administración Fiscal,2024,15538.35,51624.75,27454.41
1,1,Administración Central,"""Agricultura, Ganadería y Pesca""",2024,175580.7,169536.1,54572.94
2,1,Administración Central,Agua Potable y Alcantarillado,2024,400651.7,659716.0,281655.5
3,1,Administración Central,"""Ciencia, Tecnología e Innovación""",2024,102409.7,118760.2,75721.31
4,1,Administración Central,"""Comercio, Turismo y Otros Servicios""",2024,90693.9,66999.3,40278.46
5,1,Administración Central,Comunicaciones,2024,170312.2,368362.0,333057.4
6,1,Administración Central,Control de la Gestión Pública,2024,314.5979,5321.341,283.0113
7,1,Administración Central,Defensa,2024,650241.9,1924195.0,1743143.0
8,1,Administración Central,Dirección Superior Ejecutiva,2024,190222.8,312590.1,250845.8
9,1,Administración Central,Ecología y Desarrollo Sostenible,2024,94896.0,211575.7,103426.8
