In [None]:
import requests
import numpy as np
import json
import pandas as pd
from datetime import datetime
import time
import os
import plotly.express as px
import plotly.graph_objects as go

### Define functions

In [2]:
def fetch_wb_documents():
    """API call to get metadata"""
    base_url = "https://search.worldbank.org/api/v3/wds"
    fields = "id,docdt,docty,majdocty,url,count,lang,repnb,projid,alt_title,display_title_exact,display_title"

    offset = 0
    rows_per_page = 100

    print("Fetching data from World Bank API v3...")

    # Get the first page
    params = {
        "format": "json",
        "rows": rows_per_page,
        "docty_exact": "Implementation Completion Report Review",
        "majdocty_exact": "Project Documents",
        "strdate": "2019-01-01",
        "enddate": "2025-04-15",
        "fl": fields,
        "os": offset,
    }

    response = requests.get(base_url, params=params)
    response.raise_for_status()
    all_data = response.json()

    total_docs = all_data.get("total", 0)
    fetched_count = len(all_data.get("documents", {}))

    print(f"Fetched {fetched_count} of {total_docs} documents")

    while fetched_count < total_docs:
        offset += rows_per_page

        params["os"] = offset

        response = requests.get(base_url, params=params)
        response.raise_for_status()
        page_data = response.json()

        page_docs = page_data.get("documents", {})

        all_data["documents"].update(page_docs)

        fetched_count = len(all_data.get("documents", {}))
        print(f"Fetched {fetched_count} of {total_docs} documents")

        time.sleep(0.5)

    print(f"Total documents fetched: {fetched_count}")
    return all_data


def extract_to_df(docs_json):
    "ET meta info to df"
    documents = docs_json.get("documents", {})

    doc_values = [
        doc
        for key, doc in documents.items()
        if key != "facets" and isinstance(doc, dict)
    ]
    title = [doc.get("display_title", "") for doc in doc_values]
    og_country = [doc.get("count", "") for doc in doc_values]
    p_date = [doc.get("docdt", "") for doc in doc_values]

    df = pd.DataFrame(
        {"Title": title, "OG Country": og_country, "Publication Year": p_date}
    )

    df["Publication Year"] = pd.to_datetime(df["Publication Year"]).dt.strftime("%Y")
    df["Country_ex"] = df["Title"].str.split("-").str[0].str.strip()

    return df

### Get data and correct country names

In [3]:
iccr_docs = fetch_wb_documents()
df_iccr = extract_to_df(iccr_docs)

Fetching data from World Bank API v3...
Fetched 101 of 1623 documents
Fetched 201 of 1623 documents
Fetched 301 of 1623 documents
Fetched 401 of 1623 documents
Fetched 501 of 1623 documents
Fetched 601 of 1623 documents
Fetched 701 of 1623 documents
Fetched 801 of 1623 documents
Fetched 901 of 1623 documents
Fetched 1001 of 1623 documents
Fetched 1101 of 1623 documents
Fetched 1201 of 1623 documents
Fetched 1301 of 1623 documents
Fetched 1401 of 1623 documents
Fetched 1501 of 1623 documents
Fetched 1601 of 1623 documents
Fetched 1624 of 1623 documents
Total documents fetched: 1624


In [4]:
# check differences between extracted and og country
df_iccr["diff_og_ex"] = np.where(df_iccr["Country_ex"] == df_iccr["OG Country"], 0, 1)

# manual correction of detected differences
df_iccr["Country_ex_2"] = df_iccr["Country_ex"].replace(
    {
        "Albania Gender Equality DPF": "Albania",
        "Belarus Forestry Development Project": "Belarus",
        "Burkina Faso Public Sector Modernization Program": "Burkina Faso",
        "China Health Reform Program": "China",
        "Cote d": "Cote d'Ivoire",
        "Education sector policy : Harsha Project": "Harsha Project",
        "Grenada First Fiscal Resilience and Blue Growth Development Policy Credit": "Grenada",
        "HarshaNov19th_OUO": "Harsha Project",
        "Niger Community Action Project for Climate Resilience": "Niger",
        "Rwanda Energy Sector Development Policy Loan (P162671)": "Rwanda",
        "Viet Nam": "Vietnam",
    }
)

### Plot interactive graph, press play

In [5]:
df_g = (
    df_iccr.groupby(["Country_ex_2", "Publication Year"])
    .size()
    .reset_index(name="Count")
)
df_g = df_g.rename(columns={"Country_ex_2": "Country", "Publication Year": "Year"})
yearly_totals = df_g.groupby("Year")["Count"].sum().reset_index()
yearly_totals.columns = ["Year", "Year Total"]
df_g = pd.merge(df_g, yearly_totals, on="Year")
df_g["Percentage"] = (df_g["Count"] / df_g["Year Total"] * 100).round(1)


years = df_g["Year"].unique().tolist()
years.sort()

all_countries = df_g["Country"].unique().tolist()
all_countries.sort()  # Sort alphabetically

max_percentage = df_g["Percentage"].max()

In [8]:
fig_percentages = px.bar(
    df_g,
    x="Country",
    y="Percentage",
    animation_frame="Year",
    title="Percentage of Documents by Country",
    labels={"Percentage": "Percentage of Documents", "Country": "Country"},
    color="Country",
    range_y=[0, max_percentage * 1.1],
)


# Order countries and add margins
fig_percentages.update_layout(
    xaxis={"categoryorder": "array", "categoryarray": all_countries},
    showlegend=False,
    margin=dict(b=150),
)

# Remove Play/Pause buttons
fig_percentages.update_layout(updatemenus=[])

# Update each frame title and bar text
for frame in fig_percentages.frames:
    year_value = frame.name
    frame.layout = go.Layout(
        title_text=f"Percentage of Documents by Country - Year: {year_value}",
        xaxis=dict(categoryorder="array", categoryarray=all_countries),
        yaxis=dict(range=[0, max_percentage * 1.1]),
    )


# Add only slider (no Play/Pause), moved lower down
fig_percentages.update_layout(
    sliders=[
        {
            "active": 0,
            "yanchor": "top",
            "xanchor": "left",
            "currentvalue": {
                "font": {"size": 16},
                "prefix": "",
                "visible": True,
                "xanchor": "right",
            },
            "pad": {"b": 20, "t": 120},  # Extra top padding to avoid overlap
            "len": 0.9,
            "x": 0.1,
            "y": -0.4,  # Push slider further down
            "steps": [
                {
                    "args": [
                        [year],
                        {"frame": {"duration": 0, "redraw": True}, "mode": "immediate"},
                    ],
                    "label": str(year),
                    "method": "animate",
                }
                for year in years
            ],
        }
    ]
)

# Ensure x-axis labels are visible and angled
fig_percentages.update_layout(
    xaxis=dict(layer="above traces", tickangle=45), height=700
)

# Show the figure
fig_percentages.show()