Skip to content

Commit

Permalink
Merge pull request #1 from isawnyu/issue417
Browse files Browse the repository at this point in the history
  • Loading branch information
paregorios committed Feb 3, 2020
2 parents b854ced + 713f115 commit 68bf02b
Showing 1 changed file with 29 additions and 25 deletions.
54 changes: 29 additions & 25 deletions src/isaw/bibitems/zotero.py
Expand Up @@ -17,37 +17,41 @@ class ZoteroWebParser(grok.GlobalUtility):
item_id = None

def fetch(self, uri):
url_path = urlparse(uri).path
path_parts = url_path.split('/')
if 'itemKey' in path_parts:
self.item_id = path_parts[path_parts.index('itemKey') + 1]
else:
# Guess?
self.item_id = path_parts[-1]
o = urlparse(uri)
if o.hostname != 'www.zotero.org':
return {u"error": u"Only URIs in the www.zotero.org domain can be fetched."}

user_agent = 'ISAWBibItems/(+https://github.com/isawnyu/isaw.bibitems)'
self.request_headers = {
'user-agent': user_agent,
'cache-control': 'no-cache'
}

try:
response = requests.get(uri)
response = requests.get(uri, headers=self.request_headers)
except requests.exceptions.RequestException:
logger.exception('Error fetching Zotero web page.')
return {u"error": u"Could not fetch web page"}

logger.exception('Error fetching Zotero web page: {}'.format(uri))
return {u"error": u"Could not fetch web page {}.".format(uri)}
if response.status_code >= 400:
return {u"error": u"Could not fetch web page"}
return {u"error": u"Could not fetch web page {} (HTTP Error {}).".format(uri, response.status_code)}

parsed = BeautifulSoup(response.text, "lxml")
details = parsed.find(id=u"item-details-div")
if not details:
return {u"error": u"Could not find item-details-div"}
zuri = response.url
if zuri != uri:
# a redirect has occurred
o = urlparse(zuri)

info = loads(details.get('data-loadconfig', '{}'))
self.library_id = info.get('libraryID')
self.library_type = info.get('libraryType')

if not self.library_id:
return {u"error": u"Could not find determine library id"}
path_parts = o.path.split('/')
if path_parts[1] == 'groups':
self.library_type = 'group'
self.library_id = path_parts[2]
else:
self.library_type = 'user'
self.library_id = path_parts[1]

if not self.library_type:
return {u"error": u"Could not find determine library id"}
if 'items' in path_parts:
self.item_id = path_parts[path_parts.index('items') + 1]
else:
return {u"error": u"Could not parse Zotero item id from URI {}".format(zuri)}

data = self._zotero_api_result()
result = {}
Expand Down Expand Up @@ -82,7 +86,7 @@ def fetch(self, uri):
result[u'date_of_publication'] = info.get(u'date')
result[u'text'] = info.get('abstractNote')
result[u'parent_title'] = (
info.get('blogTitle') or info.get('bookTitme') or
info.get('blogTitle') or info.get('bookTitle') or
info.get('dictionaryTitle') or info.get('encyclopediaTitle') or
info.get('forumTitle') or info.get('proceedingsTitle') or
info.get('publicationTitle') or info.get('websiteTitle')
Expand Down

0 comments on commit 68bf02b

Please sign in to comment.