Skip to content

Commit

Permalink
Merge branch 'develop' into feature/social
Browse files Browse the repository at this point in the history
  • Loading branch information
jcmundy committed May 16, 2019
2 parents d62c269 + a8b95e3 commit be72299
Show file tree
Hide file tree
Showing 20 changed files with 509 additions and 231 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ parsetab.py
media/
!apps/static/mirador/plugins/media

### django configs and files
db.cnf
staticfiles/*

### python gitignores auto-generated by github


Expand Down Expand Up @@ -122,6 +126,7 @@ static/plugins/annotator/scss/node_modules
local.py
venv
cert*
*.bk

# Sphinx documentation
docs/_build/
Expand Down
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ source venv/bin/activate
4. Install the dependencies.

~~~bash
pip install -r requirements/local
pip install -r requirements/local.txt
~~~

5. Copy and set up your local settings.
Expand Down Expand Up @@ -75,7 +75,15 @@ python manage.py runserver_plus --cert-file cert.crt 0.0.0.0:3000

[![Coverage Status](https://coveralls.io/repos/github/ecds/readux/badge.svg?branch=develop)](https://coveralls.io/github/ecds/readux?branch=develop)

Readux uses Django's default test framework, but is configured to use pytest. To run the tests, simply run:
Readux uses Django's default test framework, but is configured to use pytest.

Your database user will need to be able to create a database:

~~~
alter user readux createdb;
~~~

To run the tests, simply run:

~~~bash
pytest
Expand Down
31 changes: 7 additions & 24 deletions apps/iiif/annotations/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,39 +103,22 @@ class Annotation(models.Model):
language = models.CharField(max_length=10, default='en')
owner = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, blank=True, null=True)
oa_annotation = JSONField(default=dict, blank=False)
# TODO should probably change svg to span
# TODO Should we keep this for annotations from Mirador, or just get rid of it?
svg = models.TextField()

ordering = ['order']

# @property
# def identifier(self):
# """
# http://example.org/iiif/book1/list/p1
# """
# return "%s/iiif/%s/canvas/%s" % (settings.HOSTNAME, self.manifest.pid, self.pid)

def parse_oa_annotation(self):
dimensions = self.oa_annotation['on'][0]['selector']['default']['value'].split('=')[-1].split(',')
dimensions = self.oa_annotation['on']['selector']['default']['value'].split('=')[-1].split(',')
self.x = dimensions[0]
self.y = dimensions[1]
self.w = dimensions[2]
self.h = dimensions[3]
# if isinstance(self.oa_annotation, dict):
# try:
# # canvas = Canvas.objects.get(pid=self.oa_annotation['on'][0]['full'].split('/')[-1])
# self.canvas = Canvas.objects.get(pid=page)
# except (KeyError, TypeError):
# print('Invalid IIIF OA Annotation: Cannot get canvas id.')
# pass


def __str__(self):
return str(self.pk)

# @receiver(signals.pre_save, sender=Annotation)
# def set_oa_annotation(sender, instance, **kwargs):
# instance.parse_oa_annotation()

@receiver(signals.pre_save, sender=Annotation)
def set_span_element(sender, instance, **kwargs):
if instance.resource_type in (sender.OCR,):
Expand All @@ -145,16 +128,16 @@ def set_span_element(sender, instance, **kwargs):
# (12*(17.697/1.618))/12
character_count = len(instance.content)
font_size = (character_count*(instance.h/1.618))/character_count
instance.svg = "<span id='{pk}' style='font-family: monospace; height: {h}px; width: {w}px; font-size: {f}px'>{content}</span>".format(pk=instance.pk, h=str(instance.h), w=str(instance.w), content=instance.content, f=str(font_size))
instance.content = "<span id='{pk}' style='font-family: monospace; height: {h}px; width: {w}px; font-size: {f}px'>{content}</span>".format(pk=instance.pk, h=str(instance.h), w=str(instance.w), content=instance.content, f=str(font_size))
except ValueError as error:
instance.svg = ""
instance.content = ""
print("WARNING: {e}".format(e=error))
else:
if (type(instance.oa_annotation) == str):
instance.oa_annotation = json.loads(instance.oa_annotation)
instance.svg = instance.oa_annotation['on'][0]['selector']['item']['value']
instance.svg = instance.oa_annotation['on']['selector']['item']['value']
instance.oa_annotation['annotatedBy'] = {'name': 'Me'}
instance.content = instance.oa_annotation['resource'][0]['chars']
instance.resource_type = Annotation.COMMENTING
instance.resource_type = instance.oa_annotation['resource'][0]['@type']
instance.parse_oa_annotation()

4 changes: 2 additions & 2 deletions apps/iiif/annotations/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class AnnotationTests(APITestCase):

valid_annotation = {
'oa_annotation': '''{
"on": [{
"on": {
"full": "https://digi.vatlib.it/iiif/MSS_Vat.lat.3225/canvas/p0007",
"@type": "oa:SpecificResource",
"selector": {
Expand All @@ -31,7 +31,7 @@ class AnnotationTests(APITestCase):
"@type": "sc:Manifest",
"@id": "https://ecds.emory.edu/iiif/MSS_Vat.lat.3225/manifest.json"
}
}],
},
"@type": "oa:Annotation",
"@context": "http://iiif.io/api/presentation/2/context.json",
"@id": "13d3b867-d668-4294-b56a-df3e8860016c",
Expand Down
23 changes: 22 additions & 1 deletion apps/iiif/canvases/fixtures/canvases.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,30 @@
"IIIF_IMAGE_SERVER_BASE": "https://loris.library.emory.edu"
}
},
{
"model": "canvases.iserver",
"pk": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff",
"fields": {
"IIIF_IMAGE_SERVER_BASE": "https://iiif.archivelab.org/iiif/"
}
},
{
"model": "canvases.canvas",
"pk": "7261fae2-a24e-4a1c-9743-516f6c4ea0c9",
"fields": {
"label": "",
"pid": "fedora:emory:5622",
"summary": null,
"manifest": "464d82f6-6ae5-4503-9afc-8e3cdd92a3f1",
"position": 6,
"height": 3608,
"width": 1976,
"IIIF_IMAGE_SERVER_BASE": "02ac58b2-2c62-4f67-807e-7927ff191ace"
}
},
{
"model": "canvases.canvas",
"pk": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff",
"fields": {
"label": "",
"pid": "15210893.5622.emory.edu$95",
Expand All @@ -16,6 +37,6 @@
"position": 96,
"height": 3608,
"width": 1976,
"IIIF_IMAGE_SERVER_BASE": "02ac58b2-2c62-4f67-807e-7927ff191ace"
"IIIF_IMAGE_SERVER_BASE": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff"
}
}]
26 changes: 15 additions & 11 deletions apps/iiif/canvases/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ def thumbnail_crop_landscape(self):
# landscape
return "%s/%s/pct:25,0,50,100/,250/0/default.jpg" % (self.IIIF_IMAGE_SERVER_BASE, self.pid)

@property
def result(self):
"Empty attribute to hold the result of requests to get OCR data."
return None

def __str__(self):
return str(self.pid)

Expand All @@ -82,24 +87,23 @@ def set_dimensions(sender, instance, **kwargs):

@receiver(signals.post_save, sender=Canvas)
def add_ocr(sender, instance, **kwargs):
ocr = services.add_positional_ocr(instance)
# What comes back from fedora is 8-bit bytes
# https://stackoverflow.com/a/9562196
result = services.fetch_positional_ocr(instance)
ocr = services.add_positional_ocr(instance, result)
word_order = 1
print(ocr)
print(type(ocr))
if ocr is not None:
for word in ocr.decode('UTF-8-sig').strip().split('\r\n'):
for word in ocr:
if word == '':
continue
a = Annotation()
a.canvas = instance
print('&&&')
print(word)
a.x = int(word.split('\t')[0])
a.y = int(word.split('\t')[1])
a.w = int(word.split('\t')[2])
a.h = int(word.split('\t')[3])
a.x = word['x']
a.y = word['y']
a.w = word['w']
a.h = word['h']
a.resource_type = a.OCR
a.content = word.split('\t')[4]
a.content = word['content']
a.order = word_order
a.save()
word_order += 1
Expand Down
39 changes: 36 additions & 3 deletions apps/iiif/canvases/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,39 @@ def get_canvas_info(canvas):
results = fetch_url(canvas.service_id, timeout=settings.HTTP_REQUEST_TIMEOUT, format='json')
return results

def add_positional_ocr(canvas):
ocr = fetch_url("{p}{c}{s}".format(p=settings.DATASTREAM_PREFIX, c=canvas.pid.replace('fedora:',''), s=settings.DATASTREAM_SUFFIX), format='text/plain')
return ocr
def fetch_positional_ocr(canvas):
if 'archivelab' in canvas.IIIF_IMAGE_SERVER_BASE.IIIF_IMAGE_SERVER_BASE:
return fetch_url("https://api.archivelab.org/books/{m}/pages/{p}/ocr?mode=words".format(m=canvas.manifest.pid, p=canvas.pid.split('$')[-1]))
else:
return fetch_url("{p}{c}{s}".format(p=settings.DATASTREAM_PREFIX, c=canvas.pid.replace('fedora:',''), s=settings.DATASTREAM_SUFFIX), format='text/plain')

def add_positional_ocr(canvas, result):
ocr = []
if 'archivelab' in canvas.IIIF_IMAGE_SERVER_BASE.IIIF_IMAGE_SERVER_BASE:
if 'ocr' in result and result['ocr'] is not None:
for index, word in enumerate(result['ocr']):
if len(word) > 0:
for w in word:
ocr.append({
'content': w[0],
'w': (w[1][2] - w[1][0]),
'h': (w[1][1] - w[1][3]),
'x': w[1][0],
'y': w[1][3]
})
else:
if result is not None:
# What comes back from fedora is 8-bit bytes
for index, word in enumerate(result.decode('UTF-8-sig').strip().split('\r\n')):
if (len(word.split('\t')) == 5):
ocr.append({
'content': word.split('\t')[4],
'w': int(word.split('\t')[2]),
'h': int(word.split('\t')[3]),
'x': int(word.split('\t')[0]),
'y': int(word.split('\t')[1])
})
if (ocr):
return ocr
else:
return None
77 changes: 76 additions & 1 deletion apps/iiif/canvases/tests.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,78 @@
from django.test import TestCase
from .models import Canvas
from . import services

# Create your tests here.

class CanvasTests(TestCase):
fixtures = ['kollections.json', 'manifests.json', 'canvases.json', 'annotations.json']
def test_ia_ocr_creation(self):
valid_ia_ocr_response = {
'ocr': [
[
['III', [120, 1600, 180, 1494, 1597]]
],
[
['chambray', [78, 1734, 116, 1674, 1734]]
],
[
['tacos', [142, 1938, 188, 1854, 1938]]
],
[
['freegan', [114, 2246, 196, 2156, 2245]]
],
[
['Kombucha', [180, 2528, 220, 2444, 2528]]
],
[
['succulents', [558, 535, 588, 501, 535]],
['Thundercats', [928, 534, 1497, 478, 527]]
],
[
['poke', [557, 617, 646, 575, 614]],
['VHS', [700, 612, 1147, 555, 610]],
['chartreuse ', [1191, 616, 1209, 589, 609]],
['pabst', [1266, 603, 1292, 569, 603]],
['8-bit', [1354, 602, 1419, 549, 600]],
['narwhal', [1471, 613, 1566, 553, 592]],
['XOXO', [1609, 604, 1670, 538, 596]],
['post-ironic', [1713, 603, 1826, 538, 590]],
['synth', [1847, 588, 1859, 574, 588]]
],
[
['lumbersexual', [1741, 2928, 1904, 2881, 2922]]
]
]
}

canvas = Canvas.objects.get(pid='15210893.5622.emory.edu$95')
ocr = services.add_positional_ocr(canvas, valid_ia_ocr_response)
assert len(ocr) == 17
for word in ocr:
assert 'w' in word
assert 'h' in word
assert 'x' in word
assert 'y' in word
assert 'content' in word
assert type(word['w']) == int
assert type(word['h']) == int
assert type(word['x']) == int
assert type(word['y']) == int
assert type(word['content']) == str

def test_fedora_ocr_creation(self):
valid_fedora_positional_response = """523\t 116\t 151\t 45\tDistillery\r\n 704\t 117\t 148\t 52\tplaid,"\r\n""".encode('UTF-8-sig')

canvas = Canvas.objects.get(pid='fedora:emory:5622')
ocr = services.add_positional_ocr(canvas, valid_fedora_positional_response)
assert len(ocr) == 2
for word in ocr:
assert 'w' in word
assert 'h' in word
assert 'x' in word
assert 'y' in word
assert 'content' in word
assert type(word['w']) == int
assert type(word['h']) == int
assert type(word['x']) == int
assert type(word['y']) == int
assert type(word['content']) == str
2 changes: 2 additions & 0 deletions apps/iiif/canvases/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ def post(self, request, *args, **kwargs):
payload = json.loads(request.body.decode('utf-8'))
oa_annotation = json.loads(payload['oa_annotation'])
canvas = Canvas.objects.get(pid=oa_annotation['on'][0]['full'].split('/')[-1])
user_id = request.user.id
annotation = Annotation()
annotation.canvas = canvas
annotation.oa_annotation = oa_annotation
annotation.owner_id = user_id
annotation.save()
return JsonResponse(oa_annotation, safe=False)

Expand Down
Loading

0 comments on commit be72299

Please sign in to comment.