In [8]:
import requests
import pandas as pd
import numpy as np
import plotly.express as px
from tqdm import tqdm
import backoff

In [6]:
@backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_time=30)
@backoff.on_predicate(backoff.expo, lambda x: x.status_code >= 429, max_time=30)
def make_request(url, params=None, debug=False):
    if debug:
        print(url, params)
    if params is None:
        return requests.get(url)
    else:
        return requests.get(url, params=params)


def paginate_openalex(url, params=None, per_page=200, debug=False):
    if params is None:
        params = {}
    if "per-page" not in params and per_page:
        params["per-page"] = per_page
    cursor = "*"
    while cursor:
        params["cursor"] = cursor
        r = make_request(url, params, debug=debug)
        yield r

        page_with_results = r.json()
        # update cursor to meta.next_cursor
        cursor = page_with_results["meta"]["next_cursor"]



In [21]:
url = "https://api.openalex.org/works"
type_filter_str = "type:article|preprint|dissertation|review|book|letter|other|report|editorial|erratum|grant|supplementary-materials|retraction"
data = []
countries = ["US", "CN", "GB", "DE", "JP", "IN", "FR", "CA", "AU", "ES", "RU", "ID", "KR"]
for country in tqdm(countries):
    params = {
        "mailto": "jportenoy@ourresearch.org",
        "filter": f"{type_filter_str},from_publication_date:1980-01-01,to_publication_date:2026-01-01,authorships.countries:{country}",
        "group_by": "publication_year",
    }
    for r in paginate_openalex(url, params=params):
        for item in r.json()["group_by"]:
            data.append({
                "country_code": country,
                "publication_year": int(item["key"]),
                "works_count": int(item["count"]),
            })
df = pd.DataFrame(data)


100%|██████████| 13/13 [00:03<00:00,  4.10it/s]


In [23]:
df_show = df[(df["publication_year"]>1999)&(df["publication_year"]<2024)]
fig = px.line(df_show, x="publication_year", y="works_count", color="country_code", height=800)
fig.show()