Merge d86272f into 8e20b85

duecredit · Jun 23, 2017 · fa00069 · fa00069
2 parents 8e20b85 + d86272f
commit fa00069
Show file tree

Hide file tree

Showing 3 changed files with 29 additions and 11 deletions.
diff --git a/duecredit/io.py b/duecredit/io.py
@@ -260,12 +260,7 @@ def condition_bibtex(bibtex):
     # as for BIDS paper.  Workaround to add trailing + after pages number
     # related issue asking for a new release: https://github.com/brechtm/citeproc-py/issues/72
     bibtex = re.sub(r'(pages\s*=\s*["{]\d+)(["}])', r'\1+\2', bibtex)
-    # TODO: manage to save/use UTF-8
-    if PY2:
-        # TODO: citeproc master, after 0.3.0 allows to load UTF-8 encoded
-        # files... so we need to fail for a release to take advantage
-        # bibtex = bibtex.encode('utf-8')
-        bibtex = bibtex.encode('ascii', 'ignore')
+    bibtex = bibtex.encode('utf-8')
     return bibtex
 
 
@@ -278,18 +273,30 @@ def format_bibtex(bibtex_entry, style='harvard1'):
             "For formatted output we need citeproc and all of its dependencies "
             "(such as lxml) but there is a problem while importing citeproc: %s"
             % str(e))
+    decode_exceptions = UnicodeDecodeError
+    try:
+        from citeproc.source.bibtex.bibparse import BibTeXDecodeError
+        decode_exceptions = (decode_exceptions, BibTeXDecodeError)
+    except ImportError:
+        # this version doesn't yet have this exception defined
+        pass
     key = bibtex_entry.get_key()
     # need to save it temporarily to use citeproc-py
     fname = tempfile.mktemp(suffix='.bib')
     try:
-        with open(fname, 'wt') as f:
+        with open(fname, 'wb') as f:
             f.write(condition_bibtex(bibtex_entry.rawentry))
         # We need to avoid cpBibTex spitting out warnings
         old_filters = warnings.filters[:]  # store a copy of filters
         warnings.simplefilter('ignore', UserWarning)
         try:
-            # TODO: needs citeproc release past 0.3.0
-            bib_source = cpBibTeX(fname) #, encoding='utf-8')
+            try:
+                bib_source = cpBibTeX(fname)
+            except decode_exceptions as e:
+                # So .bib must be having UTF-8 characters.  With
+                # a recent (not yet released past v0.3.0-68-g9800dad
+                # we should be able to provide encoding argument
+                bib_source = cpBibTeX(fname, encoding='utf-8')
         except Exception as e:
             lgr.error("Failed to process BibTeX file %s: %s" % (fname, e))
             return "ERRORED: %s" % str(e)

diff --git a/duecredit/tests/test_io.py b/duecredit/tests/test_io.py
@@ -1,4 +1,4 @@
-# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
+# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil; coding: utf-8 -*-
 # ex: set sts=4 ts=4 sw=4 noet:
 # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
 #
@@ -530,6 +530,17 @@ def test_format_bibtex_zenodo_doi():
             """Ghosh, S. et al., 2016. nipype: Release candidate 1 for version 0.12.0.""")
 
 
+def test_format_bibtex_with_utf_characters():
+    """
+    test that we can correctly parse bibtex entry if it contains utf-8 characters
+    """
+    # this was fetched on 2017-07-23
+    bibtex_utf8 = import_doi('10.5281/zenodo.60847').replace("Brett", u"Brótt")
+    # there's more unicode characters inside the bibtex file for this doi
+    # but we put them also in the beginning for extra testing
+    assert (format_bibtex(BibTeX(bibtex_utf8)) == u'Brótt, M. et al., 2015. nibabel 2.0.1.')
+
+
 def test_is_contained():
     toppath = 'package'
     assert _is_contained(toppath, 'package.module')

diff --git a/setup.py b/setup.py
@@ -82,7 +82,7 @@ def find_packages(path, prefix):
     version=__version__,
     packages=list(find_packages([PACKAGE_ABSPATH], PACKAGE)),
     scripts=[],
-    install_requires=['requests', 'citeproc-py', 'six'],
+    install_requires=['requests', 'citeproc-py>=0.4', 'six'],
     extras_require={
         'tests': [
             'pytest',