# Assignment 5 extra exercises: Visualizing the World Development Indicators

---

Let's continue with Hans Rosling's visualization in the first part of his TED talk: https://www.ted.com/playlists/474/the_best_hans_rosling_talks_yo  
There is an interactive visualization here: https://www.gapminder.org/tools/#$chart-type=bubbles&url=v2  
Let's build a video of it with Python!

In [1]:
import requests, time, warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, PillowWriter

YEAR_MIN, YEAR_MAX = 1962, 2024
FPS = 12
OUTPUT = "gapminder.gif"
POINT_ALPHA = 0.75

r = requests.get("https://api.worldbank.org/v2/country?format=json&per_page=400", timeout=60)
r.raise_for_status()
meta, rows = r.json()
countries = []
for c in rows:
    region = (c.get("region") or {}).get("value")
    if region and region.lower() != "aggregates":
        countries.append({
            "iso3": c.get("id"),
            "name": c.get("name"),
            "region": region
        })
df_countries = pd.DataFrame(countries)

def _get_all(url, params):
    params = {**params, "format": "json", "per_page": 20000}
    out = []
    r = requests.get(url, params=params, timeout=60); r.raise_for_status()
    data = r.json(); meta = data[0]; out.extend(data[1])
    for page in range(2, int(meta.get("pages", 1)) + 1):
        p = {**params, "page": page}
        rr = requests.get(url, params=p, timeout=60); rr.raise_for_status()
        out.extend(rr.json()[1]); time.sleep(0.05)
    return out

base = "https://api.worldbank.org/v2/country/all/indicator"
codes = {
    "gdp_pc": "NY.GDP.PCAP.CD",
    "life_exp": "SP.DYN.LE00.IN",
    "pop": "SP.POP.TOTL"
}

rows_gdp = _get_all(f"{base}/{"NY.GDP.PCAP.CD"}", {})
rows_le  = _get_all(f"{base}/{"SP.DYN.LE00.IN"}", {})
rows_pop = _get_all(f"{base}/{"SP.POP.TOTL"}", {})

def _to_df(rows, colname):
    recs = []
    for r in rows:
        if r is None: 
            continue
        y = r.get("date")
        try: 
            year = int(y)
        except: 
            continue
        recs.append({
            "iso3": r.get("countryiso3code"),
            "year": year,
            colname: pd.to_numeric(r.get("value"), errors="coerce")
        })
    return pd.DataFrame(recs).dropna(subset=["iso3", "year"])

df_gdp = _to_df(rows_gdp, "gdp_pc")
df_le  = _to_df(rows_le,  "life_exp")
df_pop = _to_df(rows_pop, "pop")

# ---- merge + clean ----
df = df_countries.merge(df_gdp, on="iso3", how="inner") \
                 .merge(df_le,  on=["iso3","year"], how="inner") \
                 .merge(df_pop, on=["iso3","year"], how="inner")

df = df[(df["year"] >= YEAR_MIN) & (df["year"] <= YEAR_MAX)].copy()
df = df.dropna(subset=["gdp_pc","life_exp","pop"])
df = df[df["gdp_pc"] > 0]

years = sorted(df["year"].unique().tolist())
if not years:
    raise SystemExit("No data available in the chosen year range.")

latest = df.sort_values("year").groupby("iso3")[["name","region"]].last()
country_to_region = latest["region"].to_dict()

uregions = sorted(pd.unique(latest["region"].dropna()))
cycle = plt.rcParams["axes.prop_cycle"].by_key().get("color", []) or \
        ["#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd",
         "#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"]
region_color = {reg: cycle[i % len(cycle)] for i, reg in enumerate(uregions)}
country_color = {iso3: region_color[country_to_region.get(iso3, uregions[0])]
                 for iso3 in latest.index}

x_min = max(100, float(np.nanpercentile(df["gdp_pc"], 1)))
x_max = float(np.nanpercentile(df["gdp_pc"], 99))
y_min = max(20, float(np.nanpercentile(df["life_exp"], 1)))
y_max = min(90, float(np.nanpercentile(df["life_exp"], 99)))


fig, ax = plt.subplots(figsize=(10,6), dpi=120)
scat = ax.scatter([], [], s=[], alpha=POINT_ALPHA)
ttl  = ax.text(0.02, 0.98, "", transform=ax.transAxes, va="top", ha="left",
               fontsize=16, weight="bold")
src  = ax.text(0.02, 0.92, "Source: World Bank (NY.GDP.PCAP.CD, SP.DYN.LE00.IN, SP.POP.TOTL)",
               transform=ax.transAxes, va="top", ha="left", fontsize=9)

ax.set_xscale("log")
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_xlabel("GDP per capita (current US$, log)")
ax.set_ylabel("Life expectancy at birth (years)")
ax.grid(True, which="both", linestyle=":", linewidth=0.5, alpha=0.5)

handles, labels = [], []
for reg, col in region_color.items():
    h = ax.scatter([], [], s=80, color=col, alpha=POINT_ALPHA); handles.append(h); labels.append(reg)
ax.legend(handles, labels, title="Region", loc="lower right", frameon=True)

by_year = {y: g for y, g in df.groupby("year")}

def init():
    scat.set_offsets(np.empty((0, 2)))
    scat.set_sizes(np.array([]))
    scat.set_color([])
    ttl.set_text("")
    return scat, ttl

def update(y):
    g = by_year.get(y)
    if g is None or g.empty:
        scat.set_offsets(np.empty((0, 2))); scat.set_sizes(np.array([])); scat.set_color([])
    else:
        offsets = np.column_stack([g["gdp_pc"].values, g["life_exp"].values])
        sizes = np.sqrt(g["pop"].values) * 0.18
        colors = g["iso3"].map(country_color).values
        scat.set_offsets(offsets); scat.set_sizes(sizes); scat.set_color(colors)
    ttl.set_text(f"Life Expectancy vs GDP per Capita — {y}")
    return scat, ttl


anim = FuncAnimation(fig, update, frames=years, init_func=init, blit=False,
                     interval=1000/FPS, repeat=True)

anim.save(OUTPUT, writer=PillowWriter(fps=FPS))
plt.close(fig)

Saved gapminder_worldbank.gif
