# Getting Full Papers

In [1]:
import urllib
import requests
import os
import math
import json
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np

from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch

import igemutils as igem

email = 'ivalexander13@berkeley.edu' # enter ur email pls

## Use the Elsevier API 

Setting up the API

In [122]:
# Load configuration
con_file = open("elsevier_api/config.json")
config = json.load(con_file)
con_file.close()

# Initialize client
client = ElsClient(config['apikey'])
client.local_dir = "./get_fullpapers_pmidset/"

# Function: Get paper
def get_paper(doi: str):
    ## ScienceDirect (full-text) document example using DOI
    doi_doc = FullDoc(doi = doi)
    if doi_doc.read(client):
        return doi_doc.data
    else:
        return False

Main Loop (taken and modified from elsevier_api)

In [None]:
# Output file = doi to full_text dict dictionary (in json)
json_file = './get_fullpapers_pmidset/elsevier_fulltexts_all.json'
if os.path.isfile(json_file):
    with open(json_file, 'r') as fp:
        elsevier_fulltexts = json.load(fp)
        fp.close()
else:
    elsevier_fulltexts = {}

# hyperparam (-1 if max)
max_calls = -1

# Stats
calls = 0
fails = 0
not_elsevier = 0
successes_or_found = 0
queries = 0

try:
    # Looping through quantify_dataset output json.
    for pmid in rxn_pmids:
        doi = igem.pmid2doi()
        doi_url = f"https://doi.org/{doi}"
        
        # dont go over max calls. (-1 if infinite)
        if calls == max_calls or (queries == -1):
            print("Query limit reached.")
            break
        else:
            calls += 1

        # checks if paper has been successfully fetched before
        if pmid in elsevier_fulltexts:
            successes_or_found += 1
            print(f"## Call {calls} found.")
            continue
        
        # checks if publisher is elsevier
        if value['publisher'] != 'Elsevier' and value['publisher'] != 'Elsevier BV':
            not_elsevier += 1
            continue
        
        if (fullpaper := get_paper(doi)):
            elsevier_fulltexts[doi] = fullpaper
            successes_or_found += 1
            queries += 1
            print(f"Call {calls} success.")
        else:
            fails += 1
            print (f"##### Call {calls} failed: {fails}. DOI: {doi_url}.")
        
except KeyboardInterrupt:
    pass

# save to file
with open(json_file, 'w') as fp:
    json.dump(elsevier_fulltexts, fp)
    # vary: alter frequency of file save
    if (calls % 1 == 0):
        fp.close()

# Print Stats
print("")
print("###### STATS ######")
print(f"Total calls: {calls}")
print(f"Total number of queries: {queries}")
print(f"Total number of Elsevier papers: {calls - not_elsevier}")
print(f"Number of Non-Elsevier papers skipped: {not_elsevier}")
print(f"Number of fetch failures: {fails}")
print(f"Papers in storage: {len(elsevier_fulltexts)}")
print(f"% of success: {successes_or_found / (calls-not_elsevier) * 100}%")