## World Bank API

In [1]:
import requests
import pandas as pd

def get_wb_indicator(indicator, start_year=2000, end_year=2023):
    url = f"https://api.worldbank.org/v2/country/all/indicator/{indicator}?date={start_year}:{end_year}&format=json&per_page=5000"
    res = requests.get(url)
    data = res.json()[1]  # index 0 is metadata
    records = [{
        'country': d['country']['value'],
        'country_id': d['country']['id'],
        'year': int(d['date']),
        'value': d['value']
    } for d in data if d['value'] is not None]
    return pd.DataFrame(records)

# Example: Education Expenditure (% of GDP)
df_edu = get_wb_indicator("SE.XPD.TOTL.GD.ZS")
print(df_edu.head())

                       country country_id  year     value
0  Africa Eastern and Southern         ZH  2022  4.628624
1  Africa Eastern and Southern         ZH  2021  4.771326
2  Africa Eastern and Southern         ZH  2020  4.352440
3  Africa Eastern and Southern         ZH  2019  4.944317
4  Africa Eastern and Southern         ZH  2018  4.979931


In [2]:
import requests
import pandas as pd
from time import sleep
from functools import reduce

# 指标代码映射
indicators = {
    "edu_exp_gdp": "SE.XPD.TOTL.GD.ZS",             # Education expenditure (% of GDP)
    "gdp_per_capita": "NY.GDP.PCAP.CD",             # GDP per capita (current US$)
    "gov_exp_gdp": "NE.CON.GOVT.ZS",                # General gov spending (% of GDP)
    "unemployment_rate": "SL.UEM.TOTL.ZS",          # Unemployment rate
    "inflation_rate": "FP.CPI.TOTL.ZG",             # Inflation
    "population": "SP.POP.TOTL",                    # Population
    "urban_pop_percent": "SP.URB.TOTL.IN.ZS",       # Urban pop %
    "literacy_rate": "SE.ADT.LITR.ZS",              # Adult literacy
    "education_index": "SE.SEC.ENRR",               # Secondary enrollment (proxy)
    "hdi_proxy": "NY.GNP.PCAP.CD"                   # GNI per capita (HDI proxy)
}

def get_indicator_df(indicator_code, col_name, start_year=2000, end_year=2023):
    url = f"https://api.worldbank.org/v2/country/all/indicator/{indicator_code}?date={start_year}:{end_year}&format=json&per_page=5000"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed: {col_name}")
        return pd.DataFrame()
    data = response.json()[1]
    records = []
    for item in data:
        if item['value'] is not None:
            records.append({
                'country': item['country']['value'],
                'country_code': item['country']['id'],
                'year': int(item['date']),
                col_name: item['value']
            })
    return pd.DataFrame(records)

# 下载所有指标数据
dfs = []
for name, code in indicators.items():
    print(f"Fetching: {name}")
    df = get_indicator_df(code, name)
    dfs.append(df)
    sleep(1)

# 合并所有数据表
df_merged = reduce(lambda left, right: pd.merge(left, right, on=['country', 'country_code', 'year'], how='outer'), dfs)

# # 保存为CSV
# df_merged.to_csv("worldbank_edu_refugee_related_data.csv", index=False)
# print("✅ Data saved to 'worldbank_edu_refugee_related_data.csv'")

Fetching: edu_exp_gdp
Fetching: gdp_per_capita
Fetching: gov_exp_gdp
Fetching: unemployment_rate
Fetching: inflation_rate
Fetching: population
Fetching: urban_pop_percent
Fetching: literacy_rate
Fetching: education_index
Fetching: hdi_proxy
