In [None]:
'''
Programming for Biomedical Informatics (INFR11260)
Ian Simpson (ian.simpson@ed.ac.uk)

Example script showing the use of OpenCitations to find papers citing given papers and combined with a PubMed eUtils search
This shows the use of two different APIs in one combined script and introduces delay and tqdm
'''

# import libraries
import requests
import time
from tqdm import tqdm
import xml.etree.ElementTree as ET

# an example DOI
DOIS = ["10.1038/s41598-022-14077-1"]

# setup search details
opencite_baseurl = "https://opencitations.net/index/coci/api/v1/citations/"
esearch_baseurl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
efetch_baseurl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
delay = 0.5  # you can delay API calls to adhere to good citizen/rules of the provider

# find the dois that cite your paper using open citations
def get_citing_dois(doi):
    """Fetch citing DOIs from openciteations for a given DOI."""
    r = requests.get(opencite_baseurl + doi)
    if not r.ok:
        print(f"Error fetching from openciteations for {doi}: {r.status_code}")
        return []
    try:
        return [entry["citing"] for entry in r.json()]
    except Exception as e:
        print(f"Error parsing data for {doi}: {e}")
        return []

# use eUtils to query PubMed to identify the citing papers
def query_pubmed_by_doi(doi):
    """Look up a DOI in PubMed using [AID] and return the title if found."""
    # Step 1: search PubMed for DOI using [AID] NB this is a referring identifer keyword term
    params = {"db": "pubmed", "term": f"{doi}[AID]", "retmode": "json"}
    r = requests.get(esearch_baseurl, params=params)
    if not r.ok:
        return None
    ids = r.json().get("esearchresult", {}).get("idlist", [])
    if not ids:
        return None
    # fetch metadata in XML
    fetch_params = {"db": "pubmed", "id": ids[0], "retmode": "xml"}
    f = requests.get(efetch_baseurl, params=fetch_params)
    if not f.ok:
        return None
    try:
        root = ET.fromstring(f.text)
        # PubMed XML structure: /PubmedArticleSet/PubmedArticle/MedlineCitation/Article/ArticleTitle
        title = root.find(".//ArticleTitle")
        pmid = root.find(".//PMID")
        if title is not None:
            return pmid.text,title.text
    except Exception:
        return None
    return None

In [None]:
# now to use the functions

# store the results in a dict
results = {}

# we can use the tqdm library to create a progress bar
# for each doi
for doi in tqdm(DOIS, desc="Fetching citing DOIs"):
    # find the dois of citing papers using open citations returned as a list
    citing_dois = get_citing_dois(doi)
    # populate the results dict for each original query doi
    results[doi] = citing_dois
    # delay calls
    time.sleep(delay)

# print out the number of citing papers
print("Number of citing papers per DOI\n")
for doi, citing in results.items():
    print(f"{doi}: {len(citing)} citing papers\n")

# query PubMed and report their titles
# extract the 
for doi, citing_dois in results.items():
    print(f"Cited paper: {doi}")
    print(f"Number of citing papers: {len(citing_dois)}")
    if citing_dois:
        # for each citing doi
        for i, citing in enumerate(citing_dois, 1):
            # query pubmed to find the corresponding paper
            # retreive only the PMID and the Title (defined in function above)
            try:
                pmid,title = query_pubmed_by_doi(citing)
                print(f"  {i}. PMID ({pmid}) - {title}")
            except:
                print(f"  {i}. No PubMed record found for DOI - {citing}")
        time.sleep(delay)
    else:
        print("  No citing papers found.")