In [None]:
# code snippet to find papers that a paper cites
import urllib.request
import xml.etree.ElementTree as ET

# load my API key from the file
with open('../api_keys/ncbi.txt', 'r') as file:
    api_key = file.read().strip()

with open('../api_keys/ncbi_email.txt', 'r') as file:
    email = file.read().strip()

def get_cited_papers(pubmed_id):

    # NB you can see all the links (an astonishing number)
    # https://eutils.ncbi.nlm.nih.gov/entrez/query/static/entrezlinks.html#pubmed
    
    # Define the parameters for the eSearch request
    elink_params = {
        'dbfrom': 'pubmed',
        'id': pubmed_id,
        'linkname': 'pubmed_pubmed_refs',
        'api_key': api_key,
        'email': email,
    }
    # encode the parameters so they can be passed to the API
    encoded_data = urllib.parse.urlencode(elink_params).encode('utf-8')

    # the base request url for eSearch
    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"

    # make the request
    request = urllib.request.Request(url, data=encoded_data)
    response = urllib.request.urlopen(request)
    
    # read into an XML object
    elink_data_XML = ET.fromstring(response.read())

    # list to store the pubmed_ids of the cited papers
    cited_papers = []

    # Extract the pubmed_ids of the cited papers
    for link in elink_data_XML.findall('LinkSet/LinkSetDb/Link/Id'):
        cited_papers.append(link.text)

    return cited_papers

def get_pmc_citations(pubmed_id):

    # NB you can see all the links (an astonishing number)
    # https://eutils.ncbi.nlm.nih.gov/entrez/query/static/entrezlinks.html#pubmed
    
    # Define the parameters for the eSearch request
    elink_params = {
        'dbfrom': 'pubmed',
        'id': pubmed_id,
        'linkname': 'pubmed_pmc_refs',
        'api_key': api_key,
        'email': email,
    }
    # encode the parameters so they can be passed to the API
    encoded_data = urllib.parse.urlencode(elink_params).encode('utf-8')

    # the base request url for eSearch
    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"

    # make the request
    request = urllib.request.Request(url, data=encoded_data)
    response = urllib.request.urlopen(request)
    
    # read into an XML object
    elink_data_XML = ET.fromstring(response.read())

    # list to store the pubmed_ids of the cited papers
    citing_papers = []

    # Extract the pubmed_ids of the cited papers
    for link in elink_data_XML.findall('LinkSet/LinkSetDb/Link/Id'):
        citing_papers.append(link.text)

    return citing_papers

def get_similar_papers(pubmed_id):

    # NB you can see all the links (an astonishing number)
    # https://eutils.ncbi.nlm.nih.gov/entrez/query/static/entrezlinks.html#pubmed
    
    # Define the parameters for the eSearch request
    elink_params = {
        'dbfrom': 'pubmed',
        'id': pubmed_id,
        'linkname': 'pubmed_pubmed',
        'api_key': api_key,
        'email': email,
    }
    # encode the parameters so they can be passed to the API
    encoded_data = urllib.parse.urlencode(elink_params).encode('utf-8')

    # the base request url for eSearch
    url = f"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi"

    # make the request
    request = urllib.request.Request(url, data=encoded_data)
    response = urllib.request.urlopen(request)
    
    # read into an XML object
    elink_data_XML = ET.fromstring(response.read())

    # list to store the pubmed_ids of the cited papers
    similar_papers = []

    # Extract the pubmed_ids of the cited papers
    for link in elink_data_XML.findall('LinkSet/LinkSetDb/Link/Id'):
        similar_papers.append(link.text)

    return similar_papers


cited_paper = get_cited_papers(28462931)
print(f'this paper cites {len(cited_paper)} papers')

paper_citations_in_pmc = get_pmc_citations(28462931)
print(f'this paper is cited by {len(paper_citations_in_pmc)} papers in PMC')

similar_papers = get_similar_papers(28462931)
print(f'this paper is similar to {len(similar_papers)} papers')