<a href="https://colab.research.google.com/github/ashikita/openalex-api-notebook/blob/main/cited_by_count_articles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Pyalexを使ってOpenAlexから論文情報を取得します。

In [None]:
# 初回のみ実行
!pip install pyalex

In [None]:
import pyalex
from pyalex import Works
import csv

# === 設定項目 ===
ROR_ID = "00p4k0j84"
FROM_DATE = "2024-01-01"
TO_DATE = "2024-12-31"
MAX_RESULTS = 1000
PER_PAGE = 200
OUTPUT_FILE = "output.tsv"

pyalex.config.email = "hogehoge@m.kyushu-u.ac.jp"

# ページネーション処理
results = []
cursor = "*"
while len(results) < MAX_RESULTS:
    batch = Works() \
        .filter(
            authorships={"institutions": {"ror": ROR_ID}},
            from_publication_date=FROM_DATE,
            to_publication_date=TO_DATE,
            type="article"
        ) \
        .sort(cited_by_count="desc") \
        .get(per_page=PER_PAGE, cursor=cursor)

    results.extend(batch)
    if len(batch) < PER_PAGE:
        break
    cursor = batch.meta["next_cursor"]

# 必要な要素だけ抽出
filtered_results = []
for work in results[:MAX_RESULTS]:
    corresponding_authors = []
    for authorship in work.get("authorships", []):
        if authorship.get("is_corresponding"):
            for institution in authorship.get("institutions", []):
                if institution.get("ror") == f"https://ror.org/{ROR_ID}":
                    display_name = authorship.get("author", {}).get("display_name")
                    if display_name:
                        corresponding_authors.append(display_name)
                    break

    # topics から domain, field, subfield の display_name を抽出
    domains = []
    fields = []
    subfields = []
    for topic in work.get("topics", []):
        if "domain" in topic and topic["domain"].get("display_name"):
            domains.append(topic["domain"]["display_name"])
        if "field" in topic and topic["field"].get("display_name"):
            fields.append(topic["field"]["display_name"])
        if "subfield" in topic and topic["subfield"].get("display_name"):
            subfields.append(topic["subfield"]["display_name"])

    filtered_results.append({
        "doi": work.get("doi"),
        "publication_date": work.get("publication_date"),
        "cited_by_count": work.get("cited_by_count"),
        "is_oa": work.get("open_access", {}).get("is_oa"),
        "oa_status": work.get("open_access", {}).get("oa_status"),
        "oa_url": work.get("open_access", {}).get("oa_url"),
        "any_repository_has_fulltext": work.get("open_access", {}).get("any_repository_has_fulltext"),
        "corresponding_authors": ", ".join(corresponding_authors),
        "domains": ", ".join(domains),
        "fields": ", ".join(fields),
        "subfields": ", ".join(subfields)
    })

# TSVファイルに保存
with open(OUTPUT_FILE, "w", newline='', encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=[
        "doi", "publication_date", "cited_by_count",
        "is_oa", "oa_status", "oa_url", "any_repository_has_fulltext",
        "corresponding_authors", "domains", "fields", "subfields"
    ], delimiter='\t')
    writer.writeheader()
    writer.writerows(filtered_results)

print(f"✅ {len(filtered_results)} 件のデータを {OUTPUT_FILE} に保存しました。")