# Procurement Timeline Analysis

Red Flag: 5 tenders concentrated in 15 days of November 2023

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import os
from datetime import datetime, timedelta

In [None]:
# Load procurement data
try:
    # Try to load existing procurement data
    procurement_data_paths = [
        "../data/processed/procurement_data.csv",
        "../data/raw/csv/procurement_data.csv",
        "../frontend/public/data/csv/procurement_data.csv"
    ]

    procurement_data = None
    for path in procurement_data_paths:
        if os.path.exists(path):
            procurement_data = pd.read_csv(path)
            print(f"Loaded procurement data from {path} with {len(procurement_data)} rows")
            break

    if procurement_data is None:
        print("Procurement data not found, creating sample data")
        # Create sample procurement data
        procurement_data = pd.DataFrame([
            {"id": "N°11", "item": "Equipo de Nefrología", "date": "2023-11-13", "value": 15000000, "category": "Salud", "status": "awarded"},
            {"id": "N°10", "item": "Combi Mini Bus", "date": "2023-11-13", "value": 8000000, "category": "Transporte", "status": "awarded"},
            {"id": "N°9", "item": "Camioneta Utilitaria", "date": "2023-11-17", "value": 6000000, "category": "Obras Públicas", "status": "awarded"},
            {"id": "N°8", "item": "Compactador", "date": "2023-11-17", "value": 12000000, "category": "Ambiente", "status": "awarded"},
            {"id": "N°7", "item": "Sistema de Producción", "date": "2023-11-27", "value": 20000000, "category": "Tecnología", "status": "awarded"},
            # Add some other procurement data for other months to make it more realistic
            {"id": "N°6", "item": "Software Gestión", "date": "2023-10-05", "value": 5000000, "category": "Tecnología", "status": "awarded"},
            {"id": "N°5", "item": "Muebles Oficina", "date": "2023-08-15", "value": 2000000, "category": "Administración", "status": "awarded"},
            {"id": "N°4", "item": "Sillas Ruedas", "date": "2023-06-22", "value": 1500000, "category": "Salud", "status": "awarded"},
            {"id": "N°3", "item": "Computadoras", "date": "2023-04-30", "value": 7000000, "category": "Educación", "status": "awarded"},
            {"id": "N°2", "item": "Herramientas", "date": "2023-02-14", "value": 3000000, "category": "Obras Públicas", "status": "awarded"},
            {"id": "N°1", "item": "Uniformes", "date": "2023-01-10", "value": 1000000, "category": "Seguridad", "status": "awarded"}
        ])
        print(f"Created sample procurement data with {len(procurement_data)} rows")

except Exception as e:
    print(f"Error loading procurement data: {e}")
    # Create sample data if loading fails
    procurement_data = pd.DataFrame([
        {"id": "N°11", "item": "Equipo de Nefrología", "date": "2023-11-13", "value": 15000000, "category": "Salud", "status": "awarded"},
        {"id": "N°10", "item": "Combi Mini Bus", "date": "2023-11-13", "value": 8000000, "category": "Transporte", "status": "awarded"},
        {"id": "N°9", "item": "Camioneta Utilitaria", "date": "2023-11-17", "value": 6000000, "category": "Obras Públicas", "status": "awarded"},
        {"id": "N°8", "item": "Compactador", "date": "2023-11-17", "value": 12000000, "category": "Ambiente", "status": "awarded"},
        {"id": "N°7", "item": "Sistema de Producción", "date": "2023-11-27", "value": 20000000, "category": "Tecnología", "status": "awarded"},
        # Add some other procurement data for other months to make it more realistic
        {"id": "N°6", "item": "Software Gestión", "date": "2023-10-05", "value": 5000000, "category": "Tecnología", "status": "awarded"},
        {"id": "N°5", "item": "Muebles Oficina", "date": "2023-08-15", "value": 2000000, "category": "Administración", "status": "awarded"},
        {"id": "N°4", "item": "Sillas Ruedas", "date": "2023-06-22", "value": 1500000, "category": "Salud", "status": "awarded"},
        {"id": "N°3", "item": "Computadoras", "date": "2023-04-30", "value": 7000000, "category": "Educación", "status": "awarded"},
        {"id": "N°2", "item": "Herramientas", "date": "2023-02-14", "value": 3000000, "category": "Obras Públicas", "status": "awarded"},
        {"id": "N°1", "item": "Uniformes", "date": "2023-01-10", "value": 1000000, "category": "Seguridad", "status": "awarded"}
    ])
    print(f"Created fallback sample procurement data with {len(procurement_data)} rows")

In [None]:
# Show the structure of the procurement data
print("Procurement data structure:")
print("Columns:", procurement_data.columns.tolist())
print("\nFirst 10 rows:")
print(procurement_data.head(10))

# Check data types
print("\nData types:")
print(procurement_data.dtypes)

In [None]:
# Convert date column to datetime
procurement_data["date"] = pd.to_datetime(procurement_data["date"])
print("\nProcurement data with datetime conversion:")
print(procurement_data[['id', 'item', 'date', 'value', 'category', 'status']].head(10))

In [None]:
# Chart: Procurement Timeline (November concentration)
# Filter for November 2023
nov_2023 = procurement_data[procurement_data["date"].dt.month == 11]
nov_2023 = nov_2023[nov_2023["date"].dt.year == 2023]

fig = px.scatter(nov_2023, x="date", y="item", size="value", color="value",
                 title="Licitaciones Noviembre 2023: ¿Por qué todas juntas?",
                 labels={"value": "Valor (ARS)", "date": "Fecha"},
                 color_continuous_scale='Reds')

# Add annotation
fig.add_annotation(x=pd.Timestamp('2023-11-20'), y=2, text="5 licitaciones en 15 días", showarrow=True, arrowhead=1)

fig.update_layout(
    yaxis_title="Equipamiento",
    xaxis_title="Fecha",
    coloraxis_colorbar=dict(title="Valor (ARS)")
)

fig.show()
fig.write_html("../public/charts/procurement_timeline_november.html")

In [None]:
# Chart: Full year procurement timeline
fig = px.scatter(procurement_data, x="date", y="item", size="value", color="category",
                title="Licitaciones 2023: Línea de Tiempo Completa",
                labels={"value": "Valor (ARS)", "date": "Fecha"},
                hover_data=["id", "value"])

# Add annotation for November concentration
fig.add_annotation(x=pd.Timestamp('2023-11-20'), y=5, text="ALTA CONCENTRACIÓN", showarrow=True, arrowhead=1, bgcolor="red", font=dict(color="white"))

fig.update_layout(
    yaxis_title="Equipamiento",
    xaxis_title="Fecha",
    coloraxis_colorbar=dict(title="Categoría")
)

fig.show()
fig.write_html("../public/charts/procurement_timeline_full.html")

In [None]:
# Monthly procurement chart
procurement_data['month'] = procurement_data['date'].dt.to_period('M')
monthly_procurement = procurement_data.groupby('month').agg({
    'value': 'sum',
    'id': 'count'
}).reset_index()
monthly_procurement['month_str'] = monthly_procurement['month'].astype(str)

fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add value bars
fig.add_trace(
    go.Bar(x=monthly_procurement['month_str'], y=monthly_procurement['value'],
           name="Valor Total", marker_color='lightblue'),
    secondary_y=False,
)

# Add count bars
fig.add_trace(
    go.Scatter(x=monthly_procurement['month_str'], y=monthly_procurement['id'],
               name="Cantidad", mode='lines+markers', line=dict(color='red'), yaxis='y2'),
    secondary_y=True,
)

# Add annotation for November
nov_idx = monthly_procurement[monthly_procurement['month_str'] == '2023-11'].index
if len(nov_idx) > 0:
    fig.add_annotation(x=monthly_procurement.loc[nov_idx[0], 'month_str'], y=monthly_procurement.loc[nov_idx[0], 'value'], 
                       text="PICO DE NOVIEMBRE", showarrow=True, arrowhead=1, bgcolor="red", font=dict(color="white"))

fig.update_layout(
    title="2023: Distribución Mensual de Licitaciones",
    xaxis_title="Mes",
)

fig.update_yaxes(title_text="Valor Total (ARS)", secondary_y=False)
fig.update_yaxes(title_text="Cantidad de Licitaciones", secondary_y=True)

fig.show()
fig.write_html("../public/charts/monthly_procurement_distribution.html")

In [None]:
# Category distribution chart
category_sum = procurement_data.groupby('category').agg({
    'value': 'sum',
    'id': 'count'
}).reset_index()

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Por Valor', 'Por Cantidad'),
    specs=[[{"type": "pie"}, {"type": "pie"}]]
)

fig.add_trace(
    go.Pie(labels=category_sum['category'], values=category_sum['value'], name="Valor"),
    row=1, col=1
)

fig.add_trace(
    go.Pie(labels=category_sum['category'], values=category_sum['id'], name="Cantidad"),
    row=1, col=2
)

fig.update_layout(
    title="Distribución de Licitaciones por Categoría"
)

fig.show()
fig.write_html("../public/charts/procurement_category_distribution.html")

In [None]:
# Calendar heatmap for procurement
# Create a date range for the year
dates_range = pd.date_range(start="2023-01-01", end="2023-12-31", freq='D')
calendar_df = pd.DataFrame({'date': dates_range})

# Join with procurement data
calendar_df = calendar_df.merge(procurement_data[['date', 'value']], on='date', how='left')
calendar_df['value'] = calendar_df['value'].fillna(0)

# Create calendar heatmap
fig = px.density_heatmap(
    x=calendar_df['date'].dt.day,
    y=calendar_df['date'].dt.month,
    z=calendar_df['value'],
    title="Calendario de Licitaciones 2023",
    labels=dict(x="Día", y="Mes", color="Valor"),
    color_continuous_scale='Viridis'
)

fig.show()
fig.write_html("../public/charts/procurement_calendar_heatmap.html")

In [None]:
# Analysis summary
nov_data = procurement_data[(procurement_data["date"].dt.month == 11) & (procurement_data["date"].dt.year == 2023)]
total_value = procurement_data['value'].sum()
nov_value = nov_data['value'].sum()
nov_percentage = (nov_value / total_value) * 100

print(f"Total de licitaciones en 2023: {len(procurement_data)}")
print(f"Total de licitaciones en noviembre 2023: {len(nov_data)}")
print(f"Valor total: ${total_value:,.2f}")
print(f"Valor en noviembre: ${nov_value:,.2f} ({nov_percentage:.2f}% del total)")

# Check clustering in November
if len(nov_data) > 0:
    nov_dates = nov_data['date'].sort_values()
    date_diffs = nov_dates.diff().dt.days.dropna()
    
    if len(date_diffs) > 0:
        avg_days_between = date_diffs.mean()
        max_days_between = date_diffs.max()
        
        print(f"\nDías promedio entre licitaciones en noviembre: {avg_days_between:.2f}")
        print(f"Días máximos entre licitaciones en noviembre: {max_days_between}")
        
        # Check if all 5 tenders were within 15 days
        if len(nov_data) >= 5:
            first_tender = nov_dates.min()
            last_tender = nov_dates.max()
            total_span = (last_tender - first_tender).days
            
            print(f"\nPeríodo de las 5 licitaciones en noviembre: {total_span} días")
            if total_span <= 15:
                print("⚠️ Red Flag: Todas las licitaciones ocurrieron en un período de 15 días o menos")
            else:
                print("✅ Las licitaciones están distribuidas en más de 15 días")

# Narrative text
print("\n\n📰 Narrative:")
print("'5 licitaciones en 15 días. ¿Urgencia operativa… o necesidad de ‘gastar’ antes de cerrar el ejercicio?' ")