In [None]:
#input
# ror = 'https://ror.org/02y3ad647'
ror = 'https://ror.org/00p4k0j84' #九州大学
from_date = "2024-01-01"
to_date = "2024-12-31"

## topics.domain
# id:1 Life Sciences
#'id:2, Social Sciences
#'id:3, Physical Sciences
#'id:4, Health Sciences
## topics.field
# id:11, Agricultural and Biological Sciences (農学・生物科学)
# id:12, Arts and Humanities (人文科学)
# id:13, Biochemistry, Genetics and Molecular Biology (生化学・遺伝学・分子生物学)
# id:14, Business, Management and Accounting (経営学・管理学・会計学)
# id:15, Chemical Engineering (化学工学)
# id:16, Chemistry (化学)
# id:17, Computer Science (コンピューターサイエンス)
# id:18, Decision Sciences (意思決定科学)
# id:19, Earth and Planetary Sciences (地球惑星科学)
# id:20, Economics, Econometrics and Finance (経済学・計量経済学・金融学)
# id:21, Energy (エネルギー学)
# id:22, Engineering (工学)
# id:23, Environmental Science (環境科学)
# id:24, Immunology and Microbiology (免疫学・微生物学)
# id:25, Materials Science (材料科学)
# id:26, Mathematics (数学)
# id:27, Medicine (医学)
# id:28, Neuroscience (神経科学)
# id:29, Nursing (看護学)
# id:30, Pharmacology, Toxicology and Pharmaceutics (薬理学、毒性学および製剤学)
# id:31, Physics and Astronomy (物理学および天文学)
# id:32, Psychology (心理学)
# id:33, Social Sciences (社会科学)
# id:34, Veterinary (獣医学)
# id:35, Dentistry (歯学)
# id:36, Health Professions (医療専門職)

def build_institution_works_url(ror):
    # specify endpoint
    endpoint = 'works'

    # build the 'filter' parameter
    filters = (
        f'institutions.ror:{ror}',
        'is_paratext:false',
        'type:article',
        #'topics.domain.id:1', #Life Sciences
        #'topics.field.id:11', #Agricultural and Biological Sciences (農学・生物科学)
        #'topics.field.id:26', #Mathematics(数学)
        #'topics.field.id:27', #Medicine(医学)
        #'topics.field.id:27|28', #Medicine(医学) or Neuroscience(神経科学)
        #'topics.field.id:27|28|29|30|34|35|36', #医学、神経科学、看護学、薬学、獣医学、歯学、医療専門職
        f'from_publication_date:{from_date}',
        f'to_publication_date:{to_date}'
    )

    # put the URL together
    return f'https://api.openalex.org/{endpoint}?filter={",".join(filters)}'

filtered_works_url = build_institution_works_url(ror)
print(f'complete URL with filters:\n{filtered_works_url}')

In [None]:
# group_by_param = 'group_by=is_oa'
# group_by_param = 'group_by=oa_status'
# group_by_param = 'group_by=topics.field.id' # 複数分野にまたがる論文は二重にカウント
group_by_param = 'group_by=primary_location.source.id' # 収録物名

work_groups_url = f'{filtered_works_url}&{group_by_param}'
print(f'complete URL with group_by:\n{work_groups_url}')

In [None]:
import requests, json, time

# Polite Pool用メールアドレス（変数化）
mailto = "hogehoge@m.kyushu-u.ac.jp"

# 既存のURL（Polite Pool付き）
base_url = f"{work_group_url}&&mailto={mailto}"

# 初期カーソルと最大件数
cursor = "*"
per_page = 200
all_groups = []

start_time = time.time()

# ページネーションで全ジャーナル情報取得
while True:
    paged_url = f"{base_url}&per-page={per_page}&cursor={cursor}"
    response = requests.get(paged_url).json()
    all_groups.extend(response.get('group_by', []))
    next_cursor = response.get('meta', {}).get('next_cursor')
    if not next_cursor:
        break
    cursor = next_cursor
    time.sleep(1)  # polite pool: 1秒待機（ページごと）

# 論文数の降順でソート
all_groups.sort(key=lambda x: x['count'], reverse=True)

# 各ジャーナルIDからissn_lを取得
for item in all_groups:
    source_id = item['key'].split('/')[-1]
    source_url = f"https://api.openalex.org/sources/{source_id}?mailto={mailto}"
    try:
        source_data = requests.get(source_url).json()
        item['issn_l'] = source_data.get('issn_l', '')
    except:
        item['issn_l'] = ''
    time.sleep(1)  # polite pool: 1秒待機（ジャーナルごと）

# TSV形式で保存
with open('output.tsv', 'w', encoding='utf-8') as f:
    f.write("key\tkey_display_name\tcount\tissn_l\n")
    for item in all_groups:
        line = f"{item['key']}\t{item['key_display_name']}\t{item['count']}\t{item['issn_l']}\n"
        f.write(line)

# 終了時間と件数の表示
end_time = time.time()
elapsed_time = end_time - start_time
print(f"出力件数: {len(all_groups)} 件")
print(f"処理時間: {elapsed_time:.2f} 秒")
