In [2]:
import requests
from os import environ
from pathlib import Path
from json import loads, dumps
environ["SEMSCHOLAR_KEY"] = Path("~/.semscholarkey").expanduser().read_text().strip()


In [12]:
import time

def pause_until(timespan):
    current_time = time.time()
    if pause_until.last_time != None and current_time - pause_until.last_time < timespan:
        time.sleep(timespan - (current_time - pause_until.last_time))
    pause_until.last_time = time.time()

# Initialize the last_time attribute
pause_until.last_time = None

In [17]:
# Get the information for a paper
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/649def34f8be52c8b66281af98ae884c09aef38b", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})

In [18]:
# Get a paper by its doi
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/10.21468/SciPostPhys.7.1.014", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})

In [19]:
# Get all citations for a paper
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/10.21468/SciPostPhys.7.1.014/citations?fields=title,externalIds", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})

In [20]:
# Get all papers citing a paper
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/10.21468/SciPostPhys.7.1.014/references?fields=title,externalIds", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})

In [22]:
# Search for papers that machtch the string "physics ontology"
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/search?query=physics%20ontology&offset=100&limit=100",
                    headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})
res = loads(req.text)
papers = res["data"]

In [17]:
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/87420b5957e7c28b6170f5ef09b49cc83cf6a2a9?fields=title,journal,year,externalIds,citationStyles", 
                   headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})
print(dumps(loads(req.text), indent=2))

{
  "paperId": "87420b5957e7c28b6170f5ef09b49cc83cf6a2a9",
  "externalIds": {
    "ArXiv": "2102.02770",
    "DBLP": "journals/corr/abs-2102-02770",
    "CorpusId": 231802018
  },
  "title": "A Living Review of Machine Learning for Particle Physics",
  "year": 2021,
  "journal": {
    "name": "ArXiv",
    "volume": "abs/2102.02770"
  },
  "citationStyles": {
    "bibtex": "@Article{Feickert2021ALR,\n author = {M. Feickert and B. Nachman},\n booktitle = {arXiv.org},\n journal = {ArXiv},\n title = {A Living Review of Machine Learning for Particle Physics},\n volume = {abs/2102.02770},\n year = {2021}\n}\n"
  }
}


In [19]:
def createBibTeXFromSemScholarId(id):
    req = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/{id}?fields=citationStyles", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})
    res = loads(req.text)
    return res["citationStyles"]["bibtex"]

In [20]:
print(createBibTeXFromSemScholarId("10.21468/SciPostPhys.7.1.014"))

@Article{Kasieczka2019TheML,
 author = {Gregor Kasieczka and T. Plehn and A. Butter and K. Cranmer and Dipsikha Debnath and B. Dillon and M. Fairbairn and D. Faroughy and W. Fedorko and L. Gouskos and J. Kamenik and Patrick T. Komiske and Simon Leiss and A. Lister and S. Macaluso and S. Macaluso and E. Metodiev and L. Moore and B. Nachman and B. Nachman and Karl Nordström and Jannicke Pearkes and H. Qu and Y. Rath and M. Rieger and D. Shih and J. Thompson and Sreedevi Varma},
 booktitle = {SciPost Physics},
 journal = {SciPost Physics},
 title = {The Machine Learning landscape of top taggers},
 year = {2019}
}



In [7]:
req = requests.get("https://api.semanticscholar.org/graph/v1/paper/search?query=snow&offset=1&limit=3&fields=title", 
                   headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})
print(dumps(loads(req.text), indent=2))

{
  "total": 144492,
  "offset": 1,
  "next": 4,
  "data": [
    {
      "paperId": "3b98a87a3d4c6935b29380c4070a6c637306df64",
      "title": "Bulk Parameterization of the Snow Field in a Cloud Model"
    },
    {
      "paperId": "51b09bddafff606138fe3e69acb9121f3415aad9",
      "title": "ALL Snow Removed: Single Image Desnowing Algorithm Using Hierarchical Dual-tree Complex Wavelet Representation and Contradict Channel Loss"
    },
    {
      "paperId": "836609dc37e8c3997459fdf5d42c223b7cacc757",
      "title": "White and wonderful? Microplastics prevail in snow from the Alps to the Arctic"
    }
  ]
}


In [8]:
def countPercentageOfPapersWithAbstract(topic):
    req = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/search?query={topic}&offset={0}&limit={100}&fields=abstract", headers = {"x-api-key": environ["SEMSCHOLAR_KEY"]})
    data = loads(req.text)
    papers = data["data"]
    papersWithAbstract = 0
    for paper in papers:
        if paper["abstract"] != None:
            papersWithAbstract += 1
    return papersWithAbstract / len(papers) * 100

In [9]:
countPercentageOfPapersWithAbstract("electron")

63.0

In [14]:
def countPercentageOfPapersWithoutDoi(topic):
    req = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/search?query={topic}&offset={0}&limit={100}&fields=externalIds,title", headers = {"x-api-key": environ["SEMSCHOLAR_KEY"]})
    data = loads(req.text)
    papers = data["data"]
    papersWithoutDoi = 0
    for paper in papers:
        if not "DOI" in paper["externalIds"]:
            papersWithoutDoi += 1
            print(paper["title"])
    return papersWithoutDoi / len(papers) * 100

In [15]:
countPercentageOfPapersWithoutDoi("electron")

A formaldehyde-glutaraldehyde fixative of high osmolality for use in electron-microscopy
Electron – molecule collision calculations using the R-matrix method
THE GROUND STATE OF THE ELECTRON GAS BY A STOCHASTIC METHOD
Electron paramagnetic resonance of transition ions
Electron Paramagnetic Resonance Of Transition Ions
Scanning Electron Microscopy and X-Ray Microanalysis
Advanced Computing In Electron Microscopy


7.000000000000001

In [16]:
def countReferencesAndCitations(semscholarId):
    refLength = 0
    refOffset = 0
    while True:
        pause_until(1)
        req = loads(requests.get(f"https://api.semanticscholar.org/graph/v1/paper/{semscholarId}/references?fields=title,externalIds&offset={refOffset}", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]}).text)
        refLength += len(req["data"])
        refOffset = req.get("next")
        if refOffset == None:
            break
    citLength = 0
    citOffset = 0
    while True:
        pause_until(1)
        req = loads(requests.get(f"https://api.semanticscholar.org/graph/v1/paper/{semscholarId}/citations?fields=title,externalIds&offset={citOffset}", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]}).text)
        citLength += len(req["data"])
        citOffset = req.get("next")
        if citOffset == None:
            break
    return refLength, citLength
print(countReferencesAndCitations("87420b5957e7c28b6170f5ef09b49cc83cf6a2a9"))

(374, 149)


In [13]:

semscholarId = "87420b5957e7c28b6170f5ef09b49cc83cf6a2a9"
req = requests.get(f"https://api.semanticscholar.org/graph/v1/paper/{semscholarId}/references?fields=title,externalIds&offset=200", headers={"x-api-key": environ["SEMSCHOLAR_KEY"]})
loaded = loads(req.text)
loaded["data"] = len(loaded["data"])
print(dumps(loaded, indent=2))

{
  "offset": 200,
  "next": 300,
  "data": 100
}
