In [None]:
"""
Fetch OpenAlex source metadata by ISSN.

Input:  text file with one ISSN per line
Output: CSV with ISSN, OpenAlex ID, and title

Usage:
    python openalex_sources_by_issn.py input.txt output.csv
"""

import sys
import time
import requests
import pandas as pd
from urllib.parse import urlencode

BASE_URL = "https://api.openalex.org"
USER_AGENT = "openalex-issn-fetcher/1.0"

SELECT_FIELDS = "id,display_name,issn"


def fetch_sources_by_issn(issn_list, batch_size=40):
    rows = []
    matched = set()

    for i in range(0, len(issn_list), batch_size):
        chunk = issn_list[i:i + batch_size]
        issn_or = "|".join(chunk)

        params = {
            "filter": f"issn:{issn_or}",
            "per_page": 200,
            "select": SELECT_FIELDS,
        }

        url = f"{BASE_URL}/sources?{urlencode(params, safe='|,')}"
        response = requests.get(url, headers={"User-Agent": USER_AGENT})
        response.raise_for_status()

        results = response.json().get("results", [])

        for source in results:
            source_id = source.get("id", "")
            title = source.get("display_name", "")
            source_issns = set(source.get("issn") or [])

            for issn in source_issns.intersection(chunk):
                matched.add(issn)
                rows.append({
                    "issn": issn,
                    "openalex_id": source_id,
                    "title": title
                })

        time.sleep(0.2)

    # add unmatched ISSNs
    for issn in set(issn_list) - matched:
        rows.append({
            "issn": issn,
            "openalex_id": "",
            "title": ""
        })

    return pd.DataFrame(rows)


def main():
    if len(sys.argv) != 3:
        print("Usage: python openalex_sources_by_issn.py input.txt output.csv")
        sys.exit(1)

    input_path = sys.argv[1]
    output_path = sys.argv[2]

    with open(input_path, "r", encoding="utf-8") as f:
        issn_list = [line.strip() for line in f if line.strip()]

    df = fetch_sources_by_issn(issn_list)
    df.to_csv(output_path, index=False)
    print(f"Saved: {output_path}")


if __name__ == "__main__":
    main()