Skip to content

Commit

Permalink
Merge pull request #42 from ecohealthalliance/to_json-fix
Browse files Browse the repository at this point in the history
Replace broken doc.to_json method with doc.to_dict
  • Loading branch information
nathanathan committed Nov 14, 2018
2 parents afb5792 + eea5eb6 commit ee5eb06
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 23 deletions.
39 changes: 27 additions & 12 deletions epitator/annodoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""Annotator"""
from __future__ import absolute_import
from __future__ import print_function
import json
from . import maximum_weight_interval_set as mwis
import six
import re
Expand All @@ -24,7 +23,6 @@ def __init__(self, text=None, date=None):
else:
raise TypeError("text must be string or unicode")
self.tiers = {}
self.properties = {}
self.date = date

def add_tier(self, annotator, **kwargs):
Expand Down Expand Up @@ -69,21 +67,38 @@ def create_regex_tier(self, regex, label=None):
match.group(0))], label))
return AnnoTier(spans, presorted=True)

def to_json(self):
json_obj = {'text': self.text,
'properties': self.properties}
def to_dict(self):
"""
Convert the document into a json serializable dictionary.
This does not store all the document's data. For a complete
serialization use pickle.
>>> from .annospan import AnnoSpan
>>> from .annotier import AnnoTier
>>> import datetime
>>> doc = AnnoDoc('one two three', date=datetime.datetime(2011, 11, 11))
>>> doc.tiers = {
... 'test': AnnoTier([AnnoSpan(0, 3, doc), AnnoSpan(4, 7, doc)])}
>>> d = doc.to_dict()
>>> str(d['text'])
'one two three'
>>> str(d['date'])
'2011-11-11T00:00:00Z'
>>> sorted(d['tiers']['test'][0].items())
[('label', None), ('textOffsets', [[0, 3]])]
>>> sorted(d['tiers']['test'][1].items())
[('label', None), ('textOffsets', [[4, 7]])]
"""
json_obj = {
'text': self.text
}
if self.date:
json_obj['date'] = self.date.strftime("%Y-%m-%dT%H:%M:%S") + 'Z'

if self.properties:
json_obj['properties'] = self.properties

json_obj['tiers'] = {}
for name, tier in self.tiers.items():
json_obj['tiers'][name] = tier.to_json()

return json.dumps(json_obj)
json_obj['tiers'][name] = [
span.to_dict() for span in tier]
return json_obj

def filter_overlapping_spans(self, tiers=None, tier_names=None, score_func=None):
"""Remove the smaller of any overlapping spans."""
Expand Down
8 changes: 6 additions & 2 deletions epitator/annospan.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,13 +139,17 @@ def groupdict(self):
>>> number_span_g = SpanGroup([AnnoSpan(0, 3, doc, 'number'),
... AnnoSpan(4, 7, doc, 'number'),
... AnnoSpan(8, 12, doc, 'animal')])
>>> number_span_g.groupdict()
{'number': [AnnoSpan(0-3, number), AnnoSpan(4-7, number)], 'animal': [AnnoSpan(8-12, animal)]}
>>> number_span_g.groupdict()['number']
[AnnoSpan(0-3, number), AnnoSpan(4-7, number)]
>>> number_span_g.groupdict()['animal']
[AnnoSpan(8-12, animal)]
"""
out = {}
for base_span in self.base_spans:
for key, values in base_span.groupdict().items():
out[key] = out.get(key, []) + values
for values in out.values():
values.sort()
if self.label:
out[self.label] = [self]
return out
Expand Down
9 changes: 0 additions & 9 deletions epitator/annotier.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python
# coding=utf8
from __future__ import absolute_import
import json
import re
from .annospan import SpanGroup, AnnoSpan
from . import maximum_weight_interval_set as mwis
Expand Down Expand Up @@ -39,14 +38,6 @@ def __iter__(self):
def __getitem__(self, idx):
return self.spans[idx]

def to_json(self):
docless_spans = []
for span in self.spans:
span_dict = span.__dict__.copy()
del span_dict['doc']
docless_spans.append(span_dict)
return json.dumps(docless_spans)

def group_spans_by_containing_span(self,
other_tier,
allow_partial_containment=False):
Expand Down

0 comments on commit ee5eb06

Please sign in to comment.