Skip to content

Commit

Permalink
Added abstract retrieval
Browse files Browse the repository at this point in the history
  • Loading branch information
eddotman committed Aug 12, 2015
1 parent fc4a86a commit 6bdd6ec
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
35 changes: 31 additions & 4 deletions articledownloader/articledownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_pdf_from_doi(self, doi, writefile, mode):
:param mode: either 'crossref' or 'elsevier', depending on how we wish to access the file
:type mode: str
:returns: True on succesful write, False otherwise
:returns: True on successful write, False otherwise
:rtype: bool
'''

Expand All @@ -105,9 +105,6 @@ def get_pdf_from_doi(self, doi, writefile, mode):
if mode == 'elsevier':
if self.check_els_entitlement(doi):
try:
name = re.sub('[\(\)]', '', doi)
name = re.sub('\s+', '', name)

pdf_url='http://api.elsevier.com/content/article/doi:' + doi + '?view=FULL'
self.headers['Accept'] = 'application/pdf'

Expand All @@ -122,6 +119,36 @@ def get_pdf_from_doi(self, doi, writefile, mode):

return False

def get_abstract_from_doi(self, doi, mode):
'''
Returns abstract as a unicode string given a DOI
:param doi: DOI string for the article we want to grab metadata for
:type doi: str
:param mode: Only supports 'elsevier' for now
:type mode: str
:returns: An abstract (or None on failure)
:rtype: unicode
'''

if mode == 'elsevier':
if self.check_els_entitlement(doi):
try:
url='http://api.elsevier.com/content/article/doi:' + doi + '?view=FULL'
self.headers['Accept'] = 'application/json'

r = requests.get(url, headers=self.headers)
if r.status_code == 200:
abstract = unicode(json.loads(r.text)['full-text-retrieval-response']['coredata']['dc:description'])
return abstract
except requests.exceptions.ConnectionError:
# API download limit exceeded
return None

return None

def load_queries_from_csv(self, csvf):
'''
Loads a list of queries from a CSV file
Expand Down
7 changes: 5 additions & 2 deletions articledownloader/tests/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class Tester(TestCase):
def setUp(self):
self.downloader = ArticleDownloader('NO_API_KEY')
self.downloader = ArticleDownloader(environ.get('ELS_API_KEY'))
self.doi = '10.1016/j.nantod.2008.10.014'
self.pdf_file = TemporaryFile(mode='wb')

Expand All @@ -21,9 +21,12 @@ def test_download(self):
self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'elsevier')
self.downloader.get_pdf_from_doi(self.doi, self.pdf_file, 'crossref')

def test_abstract_download(self):
self.downloader.get_abstract_from_doi(self.doi, 'elsevier')

def test_entitlement(self):
#Test entitlement
self.assertFalse(self.downloader.check_els_entitlement(self.doi))
self.assertTrue(self.downloader.check_els_entitlement(self.doi))

def test_search(self):
#Search test
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
setup(
name = 'articledownloader',
packages = ['articledownloader'], # this must be the same as the name above
version = '2.2',
version = '2.3',
description = 'A class for downloading scientific journal articles',
author = 'Edward Kim',
author_email = 'eddotman@gmail.com',
url = 'https://github.com/eddotman/article-downloader', # use the URL to the github repo
download_url = 'https://www.github.com/eddotman/article-downloader/tarball/2.2',
download_url = 'https://www.github.com/eddotman/article-downloader/tarball/2.3',
keywords = ['journal', 'paper', 'article', 'downloader'], # arbitrary keywords
)

0 comments on commit 6bdd6ec

Please sign in to comment.