# Budget Execution Analysis

This notebook analyzes budget execution data from Carmen de Areco's transparency portal for years 2019-2025.

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import os
from pathlib import Path
import glob

# Set up data paths
data_paths = [
    Path("../data/processed"),
    Path("../data/consolidated"),
    Path("../data/cleaned"),
    Path("../frontend/public/data/csv"),
    Path("../../data/processed"),
    Path("../../data/consolidated"),
    Path("../../frontend/public/data/charts")
]

# Find all budget-related CSV files
all_budget_files = []
for path in data_paths:
    if path.exists():
        all_budget_files.extend(list(path.rglob("*budget*csv")))
        all_budget_files.extend(list(path.rglob("*execution*csv")))
        all_budget_files.extend(list(path.rglob("*presupuesto*csv")))
        all_budget_files.extend(list(path.rglob("*ejecucion*csv")))

print(f"Found {len(all_budget_files)} budget/execution files")

In [None]:
# Load the most comprehensive budget execution data
df = None

# Look for consolidated budget execution data first
for file_path in all_budget_files:
    if 'consolidated' in str(file_path) or 'Budget_Execution' in str(file_path):
        df = pd.read_csv(file_path)
        print(f"Loaded data from: {file_path}")
        break

# If no consolidated data found, use the first available file
if df is None and all_budget_files:
    df = pd.read_csv(all_budget_files[0])
    print(f"Loaded data from: {all_budget_files[0]}")

# If no budget files found, create sample data
if df is None:
    print("No budget execution data found, creating sample data for demonstration")
    
    # Sample data for demonstration
    data = {
        'year': [2019, 2019, 2019, 2020, 2020, 2020, 2021, 2021, 2021, 2022, 2022, 2022],
        'sector': ['Educacion', 'Salud', 'Obras y Servicios Publicos', 
                  'Educacion', 'Salud', 'Obras y Servicios Publicos',
                  'Educacion', 'Salud', 'Obras y Servicios Publicos',
                  'Educacion', 'Salud', 'Obras y Servicios Publicos'],
        'budget': [1000000, 800000, 1200000, 1100000, 850000, 1300000, 1150000, 900000, 1350000, 1200000, 950000, 1400000],
        'executed': [950000, 780000, 1180000, 1050000, 820000, 1250000, 1100000, 880000, 1320000, 1180000, 920000, 1380000],
    }
    df = pd.DataFrame(data)

# Standardize column names
column_mapping = {
    'Presupuesto': 'budget',
    'Ejecutado': 'executed',
    'Año': 'year',
    'anio': 'year',
    'Ejercicio': 'year',
    'Sector': 'sector',
    'area': 'sector',
    'departamento': 'sector',
    'Monto_Presupuestado': 'budget',
    'Monto_Ejecutado': 'executed',
    'Porcentaje_Ejecucion': 'execution_percentage'
}

df = df.rename(columns=column_mapping)

# Convert columns to appropriate types
numeric_cols = ['budget', 'executed']
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

if 'year' in df.columns:
    df['year'] = pd.to_numeric(df['year'], errors='coerce').astype('Int64')

print(f"Dataset shape: {df.shape}")
print(df.head())
print(f"\nColumns: {list(df.columns)}")

In [None]:
# Calculate execution percentage if not already present
if 'executed' in df.columns and 'budget' in df.columns:
    df['execution_percentage'] = np.where(df['budget'] != 0, (df['executed'] / df['budget']) * 100, 0)
    
# Create a bar chart showing budget vs execution by sector
if 'sector' in df.columns and 'budget' in df.columns and 'executed' in df.columns:
    fig = px.bar(df, 
                 x='sector', 
                 y=['budget', 'executed'], 
                 title="Budget vs Execution by Sector",
                 barmode='group',
                 labels={'value': 'Amount (ARS)', 'variable': 'Type'},
                 color_discrete_map={'budget': 'blue', 'executed': 'green'})

    # Add value labels on bars
    fig.update_traces(texttemplate='%{value:,.0f}', textposition='outside')

    fig.update_layout(
        yaxis_title="Amount (ARS)",
        xaxis_title="Sector",
        uniformtext_minsize=8,
        uniformtext_mode='hide'
    )

    fig.show()
else:
    print("Required columns for budget vs execution chart not found")

In [None]:
# Create a line chart showing budget execution trends over years
if 'year' in df.columns and 'budget' in df.columns and 'executed' in df.columns:
    # Group by year if multiple entries per year
    if 'sector' in df.columns:
        yearly_df = df.groupby(['year', 'sector']).agg({'budget': 'sum', 'executed': 'sum'}).reset_index()
        fig_trend = px.line(yearly_df, 
                            x='year', 
                            y=['budget', 'executed'], 
                            color='sector',
                            title="Budget Execution Trends by Sector",
                            labels={'value': 'Amount (ARS)', 'variable': 'Type'})
    else:
        yearly_df = df.groupby('year').agg({'budget': 'sum', 'executed': 'sum'}).reset_index()
        fig_trend = px.line(yearly_df, 
                            x='year', 
                            y=['budget', 'executed'], 
                            title="Budget Execution Trends",
                            labels={'value': 'Amount (ARS)', 'variable': 'Type'})

    fig_trend.update_layout(
        yaxis_title="Amount (ARS)",
        xaxis_title="Year",
        legend_title="Type"
    )

    fig_trend.show()
else:
    print("Required columns for trend analysis not found")

In [None]:
# Create a scatter plot to show budget vs executed amounts
if 'budget' in df.columns and 'executed' in df.columns:
    color_col = 'sector' if 'sector' in df.columns else None
    size_col = 'budget' if len(df) < 100 else None  # Only use size for smaller datasets
    
    fig_scatter = px.scatter(df, 
                             x='budget', 
                             y='executed', 
                             color=color_col,
                             size=size_col,
                             hover_data=['year'] if 'year' in df.columns else [],
                             title="Budget vs Execution Amounts",
                             labels={'budget': 'Budget (ARS)', 'executed': 'Executed (ARS)'})

    # Add perfect execution line (y=x)
    max_val = max(df['budget'].max(), df['executed'].max())
    fig_scatter.add_shape(
        type='line',
        x0=0,
        x1=max_val,
        y0=0,
        y1=max_val,
        line=dict(color='red', width=2, dash='dash'),
        name='Perfect Execution',
        yref='y',
        xref='x'
    )

    fig_scatter.show()
else:
    print("Required columns for scatter plot not found")

In [None]:
# Create a gauge chart for overall execution percentage
if 'executed' in df.columns and 'budget' in df.columns:
    overall_execution = (df['executed'].sum() / df['budget'].sum()) * 100 if df['budget'].sum() != 0 else 0

    fig_gauge = go.Figure(go.Indicator(
        domain = {'x': [0, 1], 'y': [0, 1]},
        value = overall_execution,
        mode = "gauge+number+delta",
        title = {'text': "Overall Budget Execution"},
        delta = {'reference': 100},
        gauge = {'axis': {'range': [None, 100]},
                 'bar': {'color': "darkblue"},
                 'steps': [
                     {'range': [0, 50], 'color': "lightgray"},
                     {'range': [50, 75], 'color': "gray"},
                     {'range': [75, 100], 'color': "darkgray"}],
                 'threshold': {'line': {'color': "red", 'width': 4},
                              'thickness': 0.75,
                              'value': overall_execution}}))

    fig_gauge.update_layout(height=400)
    fig_gauge.show()

    print(f"Overall execution rate: {overall_execution:.2f}%")
else:
    print("Required columns for gauge chart not found")

In [None]:
# Summary statistics
print("Summary Statistics:")
if 'budget' in df.columns:
    print(f"- Total Budget: {df['budget'].sum():,.2f} ARS")
if 'executed' in df.columns:
    print(f"- Total Executed: {df['executed'].sum():,.2f} ARS")
if 'budget' in df.columns and 'executed' in df.columns and df['budget'].sum() != 0:
    overall_execution = (df['executed'].sum() / df['budget'].sum()) * 100
    print(f"- Overall Execution Rate: {overall_execution:.2f}%")
    
if 'sector' in df.columns and 'budget' in df.columns and 'executed' in df.columns:
    print(f"\nSector-wise Execution:")

    sector_summary = df.groupby('sector')[['budget', 'executed']].sum()
    sector_summary['execution_rate'] = np.where(sector_summary['budget'] != 0, 
                                                (sector_summary['executed'] / sector_summary['budget']) * 100, 
                                                0)
    sector_summary['execution_rate'] = sector_summary['execution_rate'].round(2)

    print(sector_summary)
else:
    print("\nSector-wise analysis not possible - missing required columns")
    
# Data completeness report
print(f"\nData Completeness Report:")
print(f"- Total records: {len(df)}")
for col in df.columns:
    non_null_count = df[col].notna().sum()
    completeness = (non_null_count / len(df)) * 100 if len(df) > 0 else 0
    print(f"- {col}: {completeness:.1f}% complete ({non_null_count}/{len(df)} records)")