In [None]:
import requests
import pandas as pd
import time  # To be polite to the API


In [None]:

def fetch_all_worldbank_documents(query, start_date=None, end_date=None, rows_per_page=100):
    """Fetch all documents matching a query across all countries and pages."""
    all_records = []
    offset = 0
    while True:
        base_url = "https://search.worldbank.org/api/v3/wds"
        params = {
            "format": "json",
            "qterm": query,
            "rows": rows_per_page,
            "os": offset,
            "fl": "display_title,docdt,docty,count,repnb,pdfurl"
        }
        if start_date:
            params["strdate"] = start_date
        if end_date:
            params["enddate"] = end_date
        response = requests.get(base_url, params=params)
        data = response.json()
        if not data or "documents" not in data or not data["documents"]:
            break
        for d in data["documents"].values():
            all_records.append({
                "Title": d.get("display_title"),
                "Date": d.get("docdt"),
                "Type": d.get("docty"),
                "Country": d.get("count"),
                "Report No": d.get("repnb"),
                "PDF Link": d.get("pdfurl")
            })
        offset += rows_per_page
    return pd.DataFrame(all_records)


In [None]:
# Example: all solar finance documents from all countries since 2021

df_all = fetch_all_worldbank_documents(query="solar finance", start_date="2021-01-01", rows_per_page=100)


In [None]:
# Save to CSV
df_all.to_csv("all_worldbank_solar_finance_docs.csv", index=False)

df_all.head()
