# World Bank WDI – Economy & Growth + External Debt (Mercosur + Bolivia + Chile)

Este notebook descarga **todos los indicadores** de los tópicos *Economy & Growth* y *External debt / Debt & financial flows*,
y baja las series para **Argentina, Brasil, Paraguay, Uruguay, Bolivia y Chile** usando la API del Banco Mundial.

**Salida**: CSV/Excel con metadatos de indicadores y observaciones (país–indicador–año).

> Requisitos: `pandas`, `requests`, (opcional) `xlsxwriter`.


In [None]:
# !pip install pandas requests xlsxwriter
import time
import requests
import pandas as pd

BASE = "https://api.worldbank.org/v2"

# Países: Mercosur + Bolivia + Chile
COUNTRIES = {
    "Argentina": "ARG",
    "Brasil": "BRA",
    "Paraguay": "PRY",
    "Uruguay": "URY",
    "Bolivia": "BOL",
    "Chile": "CHL",
}

# Palabras clave para encontrar los tópicos deseados
TOPIC_NAME_KEYWORDS = {
    "economy_growth": ["economy", "growth"],
    "external_debt": ["debt"],  # incluye "External debt", "Debt & financial flows"
}

# Rango de años (cambiar a gusto)
START_YEAR = 2000
END_YEAR = 2024

# Prefijo de archivos de salida
OUT_PREFIX = "worldbank_wdi_mercosur_bol_chile"

# Pausa entre requests (cortesía a la API)
REQUEST_SLEEP = 0.1


## Funciones auxiliares
- `_get_json`: GET con reintentos
- `list_topics`: lista de tópicos
- `pick_topic_ids_by_keywords`: busca IDs de tópicos por palabras clave
- `list_indicators_for_topic(s)`: lista indicadores de un tópico (con paginación)
- `fetch_series`: descarga serie por indicador/países/años (con paginación)


In [None]:
def _get_json(url, params=None, max_retries=5, backoff=1.5):
    for attempt in range(1, max_retries+1):
        try:
            r = requests.get(url, params=params, timeout=60)
            r.raise_for_status()
            return r.json()
        except Exception:
            if attempt == max_retries:
                raise
            time.sleep(backoff ** attempt)

def list_topics():
    j = _get_json(f"{BASE}/topic", params={"format": "json"})
    return pd.DataFrame(j[1])[ ["id","value"] ]

def pick_topic_ids_by_keywords(topics_df, keywords):
    ids = []
    for _, row in topics_df.iterrows():
        name = str(row["value"]).lower()
        if all(kw.lower() in name for kw in keywords):
            ids.append(str(row["id"]))
    return ids

def list_indicators_for_topic(topic_id):
    indicators = []
    page = 1
    while True:
        j = _get_json(f"{BASE}/topic/{topic_id}/indicator", params={
            "format": "json",
            "per_page": 20000,
            "page": page
        })
        meta = j[0]
        data = j[1] if len(j) > 1 else []
        indicators.extend(data)
        if page >= meta.get("pages", 1):
            break
        page += 1
        time.sleep(0.2)
    if not indicators:
        return pd.DataFrame(columns=["id","name","unit","sourceNote","sourceOrganization","topic_id"])
    df = pd.DataFrame(indicators)
    df["topic_id"] = topic_id
    return df.reindex(columns=["id","name","unit","sourceNote","sourceOrganization","topic_id"])

def list_indicators_for_topics(topic_ids):
    frames = []
    for tid in topic_ids:
        frames.append(list_indicators_for_topic(tid))
    if frames:
        df = pd.concat(frames, ignore_index=True).drop_duplicates(subset=["id"]).reset_index(drop=True)
    else:
        df = pd.DataFrame(columns=["id","name","unit","sourceNote","sourceOrganization","topic_id"])
    return df

def fetch_series(indicator_code, country_codes, start_year, end_year):
    series = []
    page = 1
    codes = ";".join(country_codes)
    while True:
        j = _get_json(f"{BASE}/country/{codes}/indicator/{indicator_code}", params={
            "date": f"{start_year}:{end_year}",
            "format": "json",
            "per_page": 20000,
            "page": page
        })
        meta = j[0]
        data = j[1] if len(j) > 1 else []
        for item in data:
            series.append({
                "country_iso3": item.get("countryiso3code"),
                "country": (item.get("country") or {}).get("value"),
                "indicator": indicator_code,
                "date": item.get("date"),
                "value": item.get("value")
            })
        if page >= meta.get("pages", 1):
            break
        page += 1
        time.sleep(0.25)
    if not series:
        return pd.DataFrame(columns=["country_iso3","country","indicator","year","value"])
    df = pd.DataFrame(series)
    df["year"] = pd.to_numeric(df["date"], errors="coerce").astype("Int64")
    df = df.drop(columns=["date"])
    return df[["country_iso3","country","indicator","year","value"]]


## 1) Buscar IDs de tópicos y listar indicadores
Esto detecta los IDs de *Economy & Growth* y *External debt* y guarda los metadatos en `*_indicators_meta.csv`.


In [None]:
topics_df = list_topics()
display(topics_df)

econ_ids = pick_topic_ids_by_keywords(topics_df, TOPIC_NAME_KEYWORDS["economy_growth"])
debt_ids = pick_topic_ids_by_keywords(topics_df, TOPIC_NAME_KEYWORDS["external_debt"])
topic_ids = list(dict.fromkeys(econ_ids + debt_ids))  # dedupe

print("Economy/Growth topic IDs:", econ_ids)
print("External debt topic IDs:", debt_ids)
print("Using topic IDs:", topic_ids)

indicators_df = list_indicators_for_topics(topic_ids)
indicators_df = indicators_df.rename(columns={
    "id":"indicator_code",
    "name":"indicator_name",
    "unit":"unit",
    "sourceNote":"source_note",
    "sourceOrganization":"source_org"
})
indicators_df["topic_ids"] = indicators_df["topic_id"]
indicators_df = indicators_df.drop(columns=["topic_id"]).sort_values("indicator_code").reset_index(drop=True)
display(indicators_df.head())

indicators_path = f"{OUT_PREFIX}_indicators_meta.csv"
indicators_df.to_csv(indicators_path, index=False, encoding="utf-8")
print("Guardado:", indicators_path, "(n=", len(indicators_df), ")")


## 2) Descargar todas las series para los países seleccionados
Crea un CSV `*_observations.csv` y un Excel con dos hojas (`indicators_meta` y `observations`).


In [None]:
country_codes = list(COUNTRIES.values())
all_obs = []

for i, row in indicators_df.iterrows():
    ind = row["indicator_code"]
    try:
        obs = fetch_series(ind, country_codes, START_YEAR, END_YEAR)
        if not obs.empty:
            all_obs.append(obs)
    except Exception as e:
        print(f"⚠️ Error en indicador {ind}: {e}")
    time.sleep(REQUEST_SLEEP)
    if (i+1) % 25 == 0:
        print(f"Progreso: {i+1}/{len(indicators_df)} indicadores")

if all_obs:
    observations_df = pd.concat(all_obs, ignore_index=True)
else:
    observations_df = pd.DataFrame(columns=["country_iso3","country","indicator","year","value"])

display(observations_df.head())

obs_path = f"{OUT_PREFIX}_observations.csv"
observations_df.to_csv(obs_path, index=False, encoding="utf-8")
print("Guardado:", obs_path, "(filas=", len(observations_df), ")")

xlsx_path = f"{OUT_PREFIX}.xlsx"
with pd.ExcelWriter(xlsx_path, engine="xlsxwriter") as xw:
    indicators_df.to_excel(xw, sheet_name="indicators_meta", index=False)
    observations_df.to_excel(xw, sheet_name="observations", index=False)
print("Guardado:", xlsx_path)


## 3) Resumen rápido (opcional)
Cuenta puntos por país/indicador para chequear cobertura.


In [None]:
try:
    summary = (observations_df
               .groupby(["country_iso3","indicator"], dropna=False)["value"]
               .count()
               .reset_index(name="num_points")
               .sort_values(["country_iso3","num_points"], ascending=[True, False]))
    display(summary.head(20))
    summary_path = f"{OUT_PREFIX}_summary_counts.csv"
    summary.to_csv(summary_path, index=False, encoding="utf-8")
    print("Guardado:", summary_path)
except Exception as e:
    print("No se pudo generar el resumen:", e)
