In [12]:
import requests
import pandas as pd
import plotly.express as px

# 1) Fetch World Bank “country” metadata (includes aggregates + real countries)
url_meta = "https://api.worldbank.org/v2/country?format=json&per_page=5000"
resp = requests.get(url_meta)
resp.raise_for_status()
meta = resp.json()[1]      # the second element is the list of country dicts

# 2) Normalize to a DataFrame and drop aggregates (region.value == "Aggregates")
df_meta = pd.json_normalize(meta)
df_meta = df_meta[df_meta["region.value"] != "Aggregates"]

# 3) Extract the allowed country names or ISO2 codes
valid_countries = set(df_meta["name"])    # or use df_meta["id"] for ISO-2-codes



def fetch_population_countries_only(start_year: int, end_year: int) -> pd.DataFrame:
    per_page = 10000
    page = 1
    records = []
    while True:
        url = (
            f"https://api.worldbank.org/v2/country/all/indicator/SP.POP.TOTL"
            f"?date={start_year}:{end_year}"
            f"&format=json&per_page={per_page}&page={page}"
        )
        r = requests.get(url)
        r.raise_for_status()
        js = r.json()
        if len(js) < 2 or not js[1]:
            break
        for rec in js[1]:
            iso2 = rec['country']['id']
            # keep only true countries (ISO2 length == 2)
            if isinstance(iso2, str) and len(iso2) == 2:
                records.append(rec)
        page += 1

    # Flatten into DataFrame
    df = pd.DataFrame(records)
    df = df[['country', 'date', 'value']].dropna(subset=['value'])
    df['Country Name'] = df['country'].apply(lambda c: c['value'])
    df['Year'] = df['date'].astype(int)
    df.rename(columns={'value': 'Population'}, inplace=True)
    return df[['Country Name', 'Year', 'Population']]

# 1) Fetch 2000–2025 for real countries only
df = fetch_population_countries_only(1960, 2025)

# 4) Assume you already have your long-form population DataFrame `df` with a column
#    called "Country Name" (as in our previous snippet).  Filter it:
df = df[df["Country Name"].isin(valid_countries)].copy()
df.head()
# Now when you do your groupby/year → nlargest(10) logic, only real countries remain.

# 2) For each year, pick the top 10 by population
top10 = (
    df
    .groupby('Year', group_keys=False)
    .apply(lambda d: d.nlargest(10, 'Population'))
)

# 3) Build the animated bar‐race
fig = px.bar(
    top10,
    x='Population',
    y='Country Name',
    orientation='h',
    animation_frame='Year',
    range_x=[0, top10['Population'].max() * 1.05],
    title="Top 10 Countries by Population (1960–2025)",
    labels={'Population': 'Total Population', 'Country Name': 'Country'}
)
fig.update_layout(yaxis={'categoryorder': 'total ascending'})
fig.show()



SSLError: HTTPSConnectionPool(host='api.worldbank.org', port=443): Max retries exceeded with url: /v2/country?format=json&per_page=5000 (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1131)')))