Skip to content

Commit

Permalink
Merge pull request #86 from mvdoc/enh/output
Browse files Browse the repository at this point in the history
REF,ENH: refactor {BibTeX,Text}Output into Output class with subclasses
  • Loading branch information
yarikoptic committed May 25, 2016
2 parents 033a5ef + 0978a57 commit eaefaaf
Show file tree
Hide file tree
Showing 4 changed files with 363 additions and 151 deletions.
3 changes: 2 additions & 1 deletion duecredit/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,10 @@ def __contains__(self, entry):
else:
return entry.path.startswith(self.path + '.')


@property
def key(self):
return CitationKey(self.path, self.entry.get_key())
return CitationKey(self.path, self.entry.key)

@staticmethod
def get_key(path, entry_key):
Expand Down
4 changes: 4 additions & 0 deletions duecredit/entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ def __init__(self, rawentry, key=None):
def get_key(self):
return self._key

@property
def key(self):
return self.get_key()

@property
def rawentry(self):
if PY2:
Expand Down
251 changes: 128 additions & 123 deletions duecredit/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,151 +75,141 @@ def import_doi(doi, sleep=0.5, retries=10):
return bibtex


class EnumeratedEntries(Iterator):
"""A container of entries enumerated referenced by their entry_key"""
def __init__(self):
self._keys2refnr = {}
self._refnr2keys = {}
self._refnr = 1

def add(self, entry_key):
"""Add entry_key and update refnr"""
if entry_key not in self._keys2refnr:
self._keys2refnr[entry_key] = self._refnr
self._refnr2keys[self._refnr] = entry_key
self._refnr += 1

def __getitem__(self, item):
if item not in self._keys2refnr:
raise KeyError('{0} not present'.format(item))
return self._keys2refnr[item]

def fromrefnr(self, refnr):
if refnr not in self._refnr2keys:
raise KeyError('{0} not present'.format(refnr))
return self._refnr2keys[refnr]

def __iter__(self):
return iteritems(self._keys2refnr)

# Python 3
def __next__(self):
return self.next()

def next(self):
yield next(self.__iter__())

def __len__(self):
return len(self._keys2refnr)
def _is_contained(toppath, subpath):
if ':' not in toppath:
return ((toppath == subpath) or
(subpath.startswith(toppath + '.')) or
(subpath.startswith(toppath + ':')))
else:
return subpath.startswith(toppath + '.')


class TextOutput(object): # TODO some parent class to do what...?
def __init__(self, fd, collector, style=None):
class Output(object):
"""A generic class for setting up citations that then will be outputted
differently (e.g., Bibtex, Text, etc.)"""
def __init__(self, fd, collector):
self.fd = fd
self.collector = collector
# TODO: check that CLS style actually exists
self.style = style
if 'DUECREDIT_STYLE' in os.environ.keys():
self.style = os.environ['DUECREDIT_STYLE']
else:
self.style = 'harvard1'

# TODO: refactor name to sth more intuitive
def _model_citations(self, tags=None):
def _filter_citations(self, tags=None):
"""Given all the citations, filter only those that the user wants and
those that were actually used"""
if not tags:
tags = os.environ.get('DUECREDIT_REPORT_TAGS', 'reference-implementation,implementation').split(',')
tags = set(tags)

citations = self.collector.citations
if tags != {'*'}:
# Filter out citations
# Filter out citations based on tags
citations = dict((k, c)
for k, c in iteritems(citations)
if tags.intersection(c.tags))

packages = {}
modules = {}
objects = {}

for key in ('citations', 'entry_keys'):
packages[key] = defaultdict(list)
modules[key] = defaultdict(list)
objects[key] = defaultdict(list)
packages = defaultdict(list)
modules = defaultdict(list)
objects = defaultdict(list)

# for each path store both a list of entry keys and of citations
# store the citations according to their path and divide them into
# the right level
for (path, entry_key), citation in iteritems(citations):
if ':' in path:
target_dict = objects
objects[path].append(citation)
elif '.' in path:
target_dict = modules
modules[path].append(citation)
else:
target_dict = packages
target_dict['citations'][path].append(citation)
target_dict['entry_keys'][path].append(entry_key)
packages[path].append(citation)

# now we need to filter out the packages that don't have modules
# or objects cited, or are specifically requested
cited_packages = list(packages)
cited_modobj = list(modules) + list(objects)
for package in cited_packages:
package_citations = packages[package]
if list(filter(lambda x: x.cite_module, package_citations)) or \
list(filter(lambda x: _is_contained(package, x), cited_modobj)):
continue
else:
# we don't need it
del packages[package]

return packages, modules, objects

def dump(self, tags=None):
raise NotImplementedError



class TextOutput(Output):
def __init__(self, fd, collector, style=None):
super(TextOutput, self).__init__(fd, collector)
self.style = style
if 'DUECREDIT_STYLE' in os.environ.keys():
self.style = os.environ['DUECREDIT_STYLE']
else:
self.style = 'harvard1'


@staticmethod
def _format_citations(citations, citation_nr):
descriptions = map(str, set(str(r.description) for r in citations))
versions = map(str, set(str(r.version) for r in citations))
refnrs = map(str, [citation_nr[c.entry.key] for c in citations])
path = citations[0].path

return '- {0} / {1} (v {2}) [{3}]\n'.format(
", ".join(descriptions), path, ', '.join(versions), ', '.join(refnrs))

def dump(self, tags=None):
# get 'model' of citations
packages, modules, objects = self._model_citations(tags)
# mapping key -> refnr
enum_entries = EnumeratedEntries()

citations_ordered = []
# set up view

# package level
sublevels = [modules, objects]
for package in sorted(packages['entry_keys']):
for entry_key in packages['entry_keys'][package]:
enum_entries.add(entry_key)
citations_ordered.append(package)
# sublevels
for sublevel in sublevels:
for obj in sorted(filter(lambda x: package in x, sublevel['entry_keys'])):
for entry_key_obj in sublevel['entry_keys'][obj]:
enum_entries.add(entry_key_obj)
citations_ordered.append(obj)

# Now we can "render" different views of our "model"
# Here for now just text BUT that is where we can "split" the logic and provide
# different renderings given the model -- text, rest, md, tex+latex, whatever
self.fd.write('\nDueCredit Report:\n')
packages, modules, objects = self._filter_citations(tags)
# put everything into a single dict
pmo = {}
pmo.update(packages)
pmo.update(modules)
pmo.update(objects)

# get all the paths
paths = sorted(list(pmo))
# get all the entry_keys in order
entry_keys = [c.entry.key for p in paths for c in pmo[p]]
# make a dictionary entry_key -> citation_nr
citation_nr = defaultdict(int)
refnr = 1
for entry_key in entry_keys:
if entry_key not in citation_nr:
citation_nr[entry_key] = refnr
refnr += 1

for path in citations_ordered:
if ':' in path:
self.fd.write(' ')
target_dict = objects
elif '.' in path:
self.fd.write('\nDueCredit Report:\n')
start_refnr = 1
for path in paths:
# since they're lexicographically sorted by path, dependencies
# should be maintained
cit = pmo[path]
if ':' in path or '.' in path:
self.fd.write(' ')
target_dict = modules
else:
target_dict = packages
# TODO: absorb common logic into a common function
citations = target_dict['citations'][path]
entry_keys = target_dict['entry_keys'][path]
descriptions = sorted(map(str, set(str(r.description) for r in citations)))
versions = sorted(map(str, set(str(r.version) for r in citations)))
refnrs = sorted([str(enum_entries[entry_key]) for entry_key in entry_keys])
self.fd.write('- {0} / {1} (v {2}) [{3}]\n'.format(
", ".join(descriptions), path, ', '.join(versions), ', '.join(refnrs)))
self.fd.write(self._format_citations(cit, citation_nr))
start_refnr += len(cit)

# Print out some stats
obj_names = ('packages', 'modules', 'functions')
n_citations = map(len, (packages['citations'], modules['citations'], objects['citations']))
for citation_type, n in zip(obj_names, n_citations):
self.fd.write('\n{0} {1} cited'.format(n, citation_type))

if enum_entries:
citations_fromentrykey = self.collector._citations_fromentrykey()
stats = [(len(packages), 'package'),
(len(modules), 'module'),
(len(objects), 'function')]
for n, cit_type in stats:
self.fd.write('\n{0} {1} cited'.format(n, cit_type if n == 1
else cit_type + 's'))
# now print out references
printed_keys = []
if len(pmo) > 0:
self.fd.write('\n\nReferences\n' + '-' * 10 + '\n')
# collect all the entries used
refnr_key = [(nr, enum_entries.fromrefnr(nr)) for nr in range(1, len(enum_entries)+1)]
for nr, key in refnr_key:
self.fd.write('\n[{0}] '.format(nr))
citation_text = get_text_rendering(citations_fromentrykey[key], style=self.style)
if PY2:
citation_text = citation_text.encode(_PREFERRED_ENCODING)
self.fd.write(citation_text)
for path in paths:
for cit in pmo[path]:
ek = cit.entry.key
if ek not in printed_keys:
self.fd.write('\n[{0}] '.format(citation_nr[ek]))
self.fd.write(get_text_rendering(cit,
style=self.style))
printed_keys.append(ek)
self.fd.write('\n')


Expand Down Expand Up @@ -312,17 +302,32 @@ def load(cls, filename=DUECREDIT_FILE):
with open(filename, 'rb') as f:
return pickle.load(f)

class BibTeXOutput(object): # TODO some parent class to do what...?
class BibTeXOutput(Output):
def __init__(self, fd, collector):
self.fd = fd
self.collector = collector
super(BibTeXOutput, self).__init__(fd, collector)

def dump(self):
for citation in self.collector.citations.values():
def dump(self, tags=None):
packages, modules, objects = self._filter_citations(tags)
# get all the citations in order
pmo = {}
pmo.update(packages)
pmo.update(modules)
pmo.update(objects)

# get all the paths
paths = sorted(list(pmo))

entries = []
for path in paths:
for c in pmo[path]:
if c.entry not in entries:
entries.append(c.entry)

for entry in entries:
try:
bibtex = get_bibtex_rendering(citation.entry)
bibtex = get_bibtex_rendering(entry)
except:
lgr.warning("Failed to generate bibtex for %s" % citation.entry)
lgr.warning("Failed to generate bibtex for %s" % entry)
continue
self.fd.write(bibtex.rawentry + "\n")

Expand Down

0 comments on commit eaefaaf

Please sign in to comment.