In [22]:
import pandas as pd
import numpy as np
import requests
import re
from googletrans import Translator


##### Token below is to access API key for presupuesto abierto
Link to source: https://www.presupuestoabierto.gob.ar/api/

In [23]:
token = "30338571-3593-47f1-8b64-351a6aeacac3"

##### Functions used

In [24]:
def pull_budget_data(years: list[int], columns: list[str], filters: list[dict], token: str) -> pd.DataFrame:
    
    url = "https://www.presupuestoabierto.gob.ar/api/v1/credito"
    headers = {
        "Authorization": token,  # you can access this token via presupuesto abierto
        "Content-Type": "application/json"
    }
    parameters = {
        "title": "Gastos por jurisdicción",
        "ejercicios": years, # years of data you want to pull (in this format --> [2022]
        "columns": columns, # These are the columns that we will access (in a list format)
        "filters": filters  # These are the filters that we set (in a list format)
    }
    
    response = requests.post(url, headers=headers, json=parameters)
    if response.status_code == 200:
        print("Request successful")
        # Decode the response content using utf-8-sig codec
        decoded_response = response.content.decode("utf-8-sig")
        lines = decoded_response.split('\n') # The response returns a bunch of lines, each line is a new row of data
        headers = re.split(r',(?!\s)', lines[0]) # Uses a regular expression to split each column based on the commas. It must be a comma with no space directly after, as in these cases, it is a program name. This specifically represents the column names
        elements = []
        # The loop below goes through each line and creates a list for each line. These are our rows
        for i in range(1, len(lines)):
            # Split the string using a regular expression pattern and add to list with all the lines
            elements.append(re.split(r',(?!\s)', lines[i])) 
    
        budget_data = pd.DataFrame(elements[:-1], columns = headers) # creates a dataframe based on the above up to the second to last row since last line did not have any data
        
        # Three lines below convert format of data to float
        budget_data['credito_presupuestado'] = budget_data['credito_presupuestado'].astype(float)
        budget_data['credito_vigente'] = budget_data['credito_vigente'].astype(float)
        budget_data['credito_devengado'] = budget_data['credito_devengado'].astype(float)
    
    else:
        print("Failed to make request:", response.text)
        budget_data = 'NA'
    return budget_data


def translate_column(df: pd.DataFrame, column: str):
    translator = Translator()
    df[column] = df[column].apply(lambda x: translator.translate(x, dest='en').text)
    return None



In [25]:
years = [2024]
columns = [
    #"caracter_id",
    #"caracter_desc",
     "funcion_desc",
    "ejercicio_presupuestario",
    "credito_presupuestado",
    "credito_vigente",
    "credito_devengado"
    ]
filters = []

data = pull_budget_data(years, columns, filters, token)
translate_column(data, 'funcion_desc')

Request successful


In [26]:
data

Unnamed: 0,funcion_desc,ejercicio_presupuestario,credito_presupuestado,credito_vigente,credito_devengado
0,Fiscal administration,2024,26186.41,76190.73,50119.06
1,"""Agriculture, livestock and fishing""",2024,258080.3,369157.4,232583.6
2,Drinking water and sewerage,2024,475506.5,789897.3,341724.3
3,"""Science, Technology and Innovation""",2024,594712.4,1335862.0,1233392.0
4,"""Commerce, Tourism and other services""",2024,101754.2,79142.72,46177.62
5,Communications,2024,193104.8,432428.8,385708.0
6,Public management control,2024,29874.03,84483.07,75886.44
7,Defense,2024,650241.9,1924195.0,1743143.0
8,Executive Superior Directorate,2024,191212.5,314926.6,252966.3
9,Ecology and Sustainable Development,2024,126587.4,282165.5,164806.0


In [27]:
data['initial budget breakdown'] = [(data['credito_presupuestado'][i]/data['credito_presupuestado'].sum())*100 for i in range(len(data['credito_presupuestado']))]
data['current budget breakdown'] = [(data['credito_vigente'][i]/data['credito_vigente'].sum())*100 for i in range(len(data['credito_vigente']))]
data

Unnamed: 0,funcion_desc,ejercicio_presupuestario,credito_presupuestado,credito_vigente,credito_devengado,initial budget breakdown,current budget breakdown
0,Fiscal administration,2024,26186.41,76190.73,50119.06,0.065059,0.079321
1,"""Agriculture, livestock and fishing""",2024,258080.3,369157.4,232583.6,0.641193,0.384322
2,Drinking water and sewerage,2024,475506.5,789897.3,341724.3,1.181382,0.822345
3,"""Science, Technology and Innovation""",2024,594712.4,1335862.0,1233392.0,1.477545,1.390736
4,"""Commerce, Tourism and other services""",2024,101754.2,79142.72,46177.62,0.252805,0.082394
5,Communications,2024,193104.8,432428.8,385708.0,0.479763,0.450192
6,Public management control,2024,29874.03,84483.07,75886.44,0.074221,0.087953
7,Defense,2024,650241.9,1924195.0,1743143.0,1.615507,2.003237
8,Executive Superior Directorate,2024,191212.5,314926.6,252966.3,0.475062,0.327863
9,Ecology and Sustainable Development,2024,126587.4,282165.5,164806.0,0.314503,0.293756


##### Next step:
- Create pie chart based on three major categories above
- focus on a specific "funcion"