Skip to content

Commit

Permalink
Merge pull request #92 from duecredit/enh-return-all
Browse files Browse the repository at this point in the history
ENH: support DUECREDIT_REPORT_ALL=1 to report all citations, not only with functionality used
  • Loading branch information
yarikoptic committed May 30, 2016
2 parents f4d87f0 + 150b717 commit e337232
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 11 deletions.
27 changes: 27 additions & 0 deletions README.md
Expand Up @@ -179,6 +179,33 @@ depicting instructional materials -- textbooks etc on the topic):
[9] Fisher, R.A., 1936. The use of multiple measurements in taxonomic problems. Annals of eugenics, 7(2), pp.179–188.
[10] Gower, J.C. & Ross, G., 1969. Minimum spanning trees and single linkage cluster analysis. Applied statistics, pp.54–64.
[11] Sibson, R., 1973. SLINK: an optimally efficient algorithm for the single-link cluster method. The Computer Journal, 16(1), pp.30–34.

The `DUECREDIT_REPORT_ALL` flag allows one to output all the references
for the modules that lack objects or functions with citations.
Compared to the previous example, the following output additionally
shows a reference for scikit-learn since `example_scipy.py` uses
an uncited function from that package.

$> DUECREDIT_REPORT_TAGS=* DUECREDIT_REPORT_ALL=1 duecredit summary

DueCredit Report:
- Scientific tools library / numpy (v 1.10.4) [1]
- Scientific tools library / scipy (v 0.14) [2]
- Hierarchical clustering / scipy.cluster.hierarchy (v 0.14) [3, 4, 5, 6, 7, 8, 9]
- Single linkage hierarchical clustering / scipy.cluster.hierarchy:linkage (v 0.14) [10, 11]
- Machine Learning library / sklearn (v 0.15.2) [12]

3 packages cited
1 module cited
1 function cited

References
----------

[1] Van Der Walt, S., Colbert, S.C. & Varoquaux, G., 2011. The NumPy array: a structure for efficient numerical computation. Computing in Science & Engineering, 13(2), pp.22–30.
[2] Jones, E. et al., 2001. SciPy: Open source scientific tools for Python.
[3] Sneath, P.H. & Sokal, R.R., 1962. Numerical taxonomy. Nature, 193(4818), pp.855–860.
...


Ultimate goals
Expand Down
16 changes: 10 additions & 6 deletions duecredit/io.py
Expand Up @@ -29,6 +29,7 @@

_PREFERRED_ENCODING = locale.getpreferredencoding()


def get_doi_cache_file(doi):
return os.path.join(CACHE_DIR, doi)

Expand All @@ -44,7 +45,6 @@ def import_doi(doi, sleep=0.5, retries=10):
return doi

# else -- fetch it
#headers = {'Accept': 'text/bibliography; style=bibtex'}
headers = {'Accept': 'application/x-bibtex; charset=utf-8'}
url = 'http://dx.doi.org/' + doi
while retries > 0:
Expand Down Expand Up @@ -91,11 +91,14 @@ def __init__(self, fd, collector):
self.fd = fd
self.collector = collector

def _filter_citations(self, tags=None):
def _get_collated_citations(self, tags=None, all_=None):
"""Given all the citations, filter only those that the user wants and
those that were actually used"""
if not tags:
tags = os.environ.get('DUECREDIT_REPORT_TAGS', 'reference-implementation,implementation').split(',')
if all_ is None:
# consult env var
all_ = os.environ.get('DUECREDIT_REPORT_ALL', '').lower() in {'1', 'true', 'yes', 'on'}
tags = set(tags)

citations = self.collector.citations
Expand Down Expand Up @@ -125,8 +128,9 @@ def _filter_citations(self, tags=None):
cited_modobj = list(modules) + list(objects)
for package in cited_packages:
package_citations = packages[package]
if list(filter(lambda x: x.cite_module, package_citations)) or \
list(filter(lambda x: _is_contained(package, x), cited_modobj)):
if all_ or \
any(filter(lambda x: x.cite_module, package_citations)) or \
any(filter(lambda x: _is_contained(package, x), cited_modobj)):
continue
else:
# we don't need it
Expand Down Expand Up @@ -161,7 +165,7 @@ def _format_citations(citations, citation_nr):

def dump(self, tags=None):
# get 'model' of citations
packages, modules, objects = self._filter_citations(tags)
packages, modules, objects = self._get_collated_citations(tags)
# put everything into a single dict
pmo = {}
pmo.update(packages)
Expand Down Expand Up @@ -307,7 +311,7 @@ def __init__(self, fd, collector):
super(BibTeXOutput, self).__init__(fd, collector)

def dump(self, tags=None):
packages, modules, objects = self._filter_citations(tags)
packages, modules, objects = self._get_collated_citations(tags)
# get all the citations in order
pmo = {}
pmo.update(packages)
Expand Down
76 changes: 71 additions & 5 deletions duecredit/tests/test_io.py
Expand Up @@ -97,7 +97,7 @@ def test_output():

output = Output(None, collector)

packages, modules, objects = output._filter_citations(tags=['*'])
packages, modules, objects = output._get_collated_citations(tags=['*'])

assert_equal(len(packages), 1)
assert_equal(len(modules), 1)
Expand All @@ -115,7 +115,7 @@ def test_output():

output = Output(None, collector)

packages, modules, objects = output._filter_citations(tags=['*'])
packages, modules, objects = output._get_collated_citations(tags=['*'])

assert_equal(len(packages), 0)
assert_equal(len(modules), 1)
Expand All @@ -132,7 +132,7 @@ def test_output():

output = Output(None, collector)

packages, modules, objects = output._filter_citations(tags=['*'])
packages, modules, objects = output._get_collated_citations(tags=['*'])

assert_equal(len(packages), 1)
assert_equal(len(modules), 1)
Expand All @@ -152,7 +152,7 @@ def test_output():

output = Output(None, collector)

packages, modules, objects = output._filter_citations(tags=['*'])
packages, modules, objects = output._get_collated_citations(tags=['*'])

assert_equal(len(packages), 1)
assert_equal(len(packages['package']), 2)
Expand Down Expand Up @@ -180,7 +180,7 @@ def test_output():

output = Output(None, collector)

packages, modules, objects = output._filter_citations(tags=['edu'])
packages, modules, objects = output._get_collated_citations(tags=['edu'])

assert_equal(len(packages), 1)
assert_equal(len(packages['package']), 1)
Expand All @@ -192,6 +192,72 @@ def test_output():
assert_equal(modules['package.module'][0],
collector.citations[('package.module', entry.get_key())])


def test_output_return_all():
entry = BibTeX(_sample_bibtex)
entry2 = BibTeX(_sample_bibtex2)

# normal use
collector = DueCreditCollector()
collector.cite(entry, path='package')
collector.cite(entry2, path='package2')

output = Output(None, collector)

packages, modules, objects = output._get_collated_citations(tags=['*'])
assert_false(packages)
assert_false(modules)
assert_false(objects)

for flag in ['1', 'True', 'TRUE', 'true', 'on', 'yes']:
with patch.dict(os.environ, {'DUECREDIT_REPORT_ALL': flag}):
# if _all is None then get the environment
packages, modules, objects = output._get_collated_citations(tags=['*'])
assert_equal(len(packages), 2)
assert_false(modules)
assert_false(objects)
# however if _all is set it shouldn't work
packages, modules, objects = output._get_collated_citations(tags=['*'], all_=False)
assert_false(packages)
assert_false(modules)
assert_false(objects)


def test_output_tags():
entry = BibTeX(_sample_bibtex)
entry2 = BibTeX(_sample_bibtex2)

# normal use
collector = DueCreditCollector()
collector.cite(entry, path='package', cite_module=True, tags=['edu'])
collector.cite(entry2, path='package.module', tags=['wip'])

output = Output(None, collector)

packages, modules, objects = output._get_collated_citations(tags=['*'])
assert_true(len(packages) == 1)
assert_true(len(modules) == 1)
assert_false(objects)

packages, modules, objects = output._get_collated_citations()
assert_false(packages)
assert_false(modules)
assert_false(objects)

for tags in ['edu', 'wip', 'edu,wip']:
with patch.dict(os.environ, {'DUECREDIT_REPORT_TAGS': tags}):
# if tags is None then get the environment
packages, modules, objects = output._get_collated_citations()
assert_true(len(packages) == (1 if 'edu' in tags else 0))
assert_true(len(modules) == (1 if 'wip' in tags else 0))
assert_false(objects)
# however if tags is set it shouldn't work
packages, modules, objects = output._get_collated_citations(tags=['implementation'])
assert_false(packages)
assert_false(modules)
assert_false(objects)


def test_text_output():
entry = BibTeX(_sample_bibtex)
entry2 = BibTeX(_sample_bibtex2)
Expand Down

0 comments on commit e337232

Please sign in to comment.