Skip to content

Commit

Permalink
feat: ✨ add methods to get multiple papers or authors at once
Browse files Browse the repository at this point in the history
Resolve #46
  • Loading branch information
danielnsilva committed Jan 2, 2023
1 parent 9f22466 commit eba2372
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 12 deletions.
21 changes: 13 additions & 8 deletions semanticscholar/ApiRequester.py
@@ -1,10 +1,12 @@
import json
from typing import List, Union

import requests
from tenacity import (retry,
wait_fixed,
retry_if_exception_type,
stop_after_attempt)
from tenacity import (retry, retry_if_exception_type, stop_after_attempt,
wait_fixed)

from semanticscholar.SemanticScholarException import BadQueryParametersException
from semanticscholar.SemanticScholarException import \
BadQueryParametersException


class ApiRequester:
Expand Down Expand Up @@ -44,19 +46,22 @@ def get_data(
parameters: str,
headers: dict,
payload: dict = None
) -> dict:
) -> Union[dict, List[dict]]:
'''Get data from Semantic Scholar API
:param str url: absolute URL to API endpoint.
:param str parameters: the parameters to add in the URL.
:param str headers: request headers.
:param dict payload: data for POST requests.
:returns: data or empty :class:`dict` if not found.
:rtype: :class:`dict`
:rtype: :class:`dict` or :class:`List` of :class:`dict`
'''

url = f'{url}?{parameters}'
r = requests.get(url, timeout=self._timeout, headers=headers)
method = 'POST' if payload else 'GET'
payload = json.dumps(payload) if payload else None
r = requests.request(
method, url, timeout=self._timeout, headers=headers, data=payload)

data = {}
if r.status_code == 200:
Expand Down
83 changes: 79 additions & 4 deletions semanticscholar/SemanticScholar.py
Expand Up @@ -65,10 +65,10 @@ def set_timeout(self, timeout: int):

def get_paper(
self,
paper_id: Union[str, List[str]],
paper_id: str,
include_unknown_refs: bool = False,
fields: list = None
) -> Union[Paper, List[Paper]]:
) -> Paper:
'''Paper lookup
:calls: `GET https://api.semanticscholar.org/graph/v1/paper/{paper_id} \
Expand Down Expand Up @@ -106,6 +106,48 @@ def get_paper(

return paper

def get_papers(
self,
paper_ids: List[str],
fields: list = None
) -> List[Paper]:
'''Get details for multiple papers at once
:calls: `POST https://api.semanticscholar.org/graph/v1/paper/batch\
<https://api.semanticscholar.org/api-docs/graph#tag/Paper-Data/\
operation/post_graph_get_papers>`_
:param str paper_ids: list of IDs (must be <= 1000) - S2PaperId,\
CorpusId, DOI, ArXivId, MAG, ACL, PMID, PMCID, or URL from:
- semanticscholar.org
- arxiv.org
- aclweb.org
- acm.org
- biorxiv.org
:param list fields: (optional) list of the fields to be returned.
:returns: papers data
:rtype: :class:`List` of :class:`Paper`
:raises: BadQueryParametersException: if no paper was found.
'''

if not fields:
fields = Paper.SEARCH_FIELDS

url = f'{self.api_url}/paper/batch'

fields = ','.join(fields)
parameters = f'&fields={fields}'

payload = { "ids": paper_ids }

data = self._requester.get_data(
url, parameters, self.auth_header, payload)
papers = [Paper(item) for item in data]

return papers

def search_paper(
self,
query: str,
Expand Down Expand Up @@ -156,9 +198,9 @@ def search_paper(

def get_author(
self,
author_id: Union[str, List[str]],
author_id: str,
fields: list = None
) -> Union[Author, List[Author]]:
) -> Author:
'''Author lookup
:calls: `GET https://api.semanticscholar.org/graph/v1/author/\
Expand All @@ -183,6 +225,39 @@ def get_author(

return author

def get_authors(
self,
author_ids: List[str],
fields: list = None
) -> List[Author]:
'''Get details for multiple authors at once
:calls: `POST https://api.semanticscholar.org/graph/v1/author/batch\
<https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/\
operation/get_graph_get_author>`_
:param str author_ids: list of S2AuthorId (must be <= 1000).
:returns: author data
:rtype: :class:`List` of :class:`Author`
:raises: BadQueryParametersException: if no author was found.
'''

if not fields:
fields = Author.SEARCH_FIELDS

url = f'{self.api_url}/author/batch'

fields = ','.join(fields)
parameters = f'&fields={fields}'

payload = { "ids": author_ids }

data = self._requester.get_data(
url, parameters, self.auth_header, payload)
authors = [Author(item) for item in data]

return authors

def search_author(
self,
query: str,
Expand Down

0 comments on commit eba2372

Please sign in to comment.