Skip to content

Commit

Permalink
Even more code clean up and test coverage.
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Apr 13, 2020
1 parent dd9042b commit ed8e15c
Show file tree
Hide file tree
Showing 34 changed files with 947 additions and 766 deletions.
1 change: 1 addition & 0 deletions .coveragerc
Expand Up @@ -13,4 +13,5 @@ exclude_lines =
from
import
logger
LOGGER
pragma: no cover
2 changes: 1 addition & 1 deletion apps/iiif/annotations/admin.py
@@ -1,4 +1,4 @@
"""Django admin module for `apps.iiif.annotations`"""
"""Django admin module for :class:`apps.iiif.annotations`"""
from django.contrib import admin
from import_export import resources, fields
from import_export.admin import ImportExportModelAdmin
Expand Down
2 changes: 1 addition & 1 deletion apps/iiif/annotations/apps.py
@@ -1,4 +1,4 @@
"""Configuration for `apps.iiif.annotations`"""
"""Configuration for :class:`apps.iiif.annotations`"""
from django.apps import AppConfig

class AnnotationsConfig(AppConfig):
Expand Down
27 changes: 26 additions & 1 deletion apps/iiif/annotations/fixtures/annotations.json
Expand Up @@ -6,7 +6,7 @@
"y": 928,
"w": 22,
"h": 22,
"order": 54,
"order": 1,
"content": "a",
"resource_type": "cnt:ContentAsText",
"motivation": "sc:painting",
Expand Down Expand Up @@ -147,4 +147,29 @@
},
"svg": "<span id='f846587c-1e1c-44d3-b1ce-20c0f7104dc5' style='font-family: monospace; height: 28px; width: 17px; font-size: 17.305315203955498px'>,</span>"
}
},
{
"model": "annotations.annotation",
"pk": "f846588c-1e1c-44d3-b1ce-20c0f6109dc5",
"fields": {
"x": 1146,
"y": 928,
"w": 22,
"h": 22,
"order": 1,
"content": ",",
"resource_type": "cnt:ContentAsText",
"motivation": "sc:painting",
"format": "text/plain",
"canvas": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff",
"language": "en",
"owner": null,
"oa_annotation": {
"annotatedBy": {
"name": "ocr"
},
"@id": "f846587c-1e1c-44d3-b1ce-20c0f7104dc5"
},
"svg": "<span id='f846588c-1e1c-44d3-b1ce-20c0f6109dc5' style='font-family: monospace; height: 28px; width: 17px; font-size: 17.305315203955498px'>stankonia</span>"
}
}]
2 changes: 1 addition & 1 deletion apps/iiif/annotations/models.py
@@ -1,4 +1,4 @@
"""Django models for `apps.iiif.annotations`"""
"""Django models for :class:`apps.iiif.annotations`"""
from django.contrib.postgres.fields import JSONField
from django.db import models, IntegrityError
from django.conf import settings
Expand Down
2 changes: 1 addition & 1 deletion apps/iiif/annotations/tests/tests.py
@@ -1,5 +1,5 @@
# pylint: disable = missing-function-docstring, invalid-name, line-too-long
"""Test cases for `apps.iiif.annotations`."""
"""Test cases for :class:`apps.iiif.annotations`."""
from django.test import TestCase, Client
from django.test import RequestFactory
from django.conf import settings
Expand Down
2 changes: 1 addition & 1 deletion apps/iiif/annotations/urls.py
@@ -1,4 +1,4 @@
"""Url patterns for `apps.iiif.annotations`"""
"""Url patterns for :class:`apps.iiif.annotations`"""
from django.urls import path
from . import views

Expand Down
2 changes: 1 addition & 1 deletion apps/iiif/annotations/views.py
@@ -1,4 +1,4 @@
"""Django views for `apps.iiif.annotations`"""
"""Django views for :class:`apps.iiif.annotations`"""
import json
from django.views import View
from django.core.serializers import serialize
Expand Down
5 changes: 3 additions & 2 deletions apps/iiif/canvases/management/commands/rebuild_ocr.py
Expand Up @@ -49,7 +49,7 @@ def handle(self, *args, **options):
)
elif options['canvas']:
try:
canvas = Canvas.objects.get(pid=options['canvas'])
canvas = Canvas.objects.get(pid=options['canvas'])

self.__rebuild(canvas, options['testing'])
self.stdout.write(
Expand Down Expand Up @@ -107,7 +107,8 @@ def __rebuild(self, canvas, testing=False):
y=word['y'],
canvas=canvas,
owner=USER.objects.get(username='ocr'),
resource_type=Annotation.OCR
resource_type=Annotation.OCR,
order=word_order
)
word_order += 1
anno.content = word['content']
Expand Down
44 changes: 42 additions & 2 deletions apps/iiif/canvases/services.py
Expand Up @@ -28,6 +28,44 @@ def get_fake_canvas_info(canvas):
response = fetch_url(canvas.service_id, timeout=settings.HTTP_REQUEST_TIMEOUT, format='json')
return response

def get_fake_ocr():
return [
{
"h": 22,
"w": 22,
"x": 1146,
"y": 928,
"content": "Dope"
},
{
"h": 222,
"w": 222,
"x": 11462,
"y": 9282,
"content": ""
},
{
"h": 21,
"w": 21,
"x": 1141,
"y": 9281,
"content": "southernplayalisticadillacmuzik"
},
{
"h": 213,
"w": 213,
"x": 11413,
"y": 92813
},
{
"h": 214,
"w": 214,
"x": 11414,
"y": 92814,
"content": " "
}
]

def get_ocr(canvas):
"""Function to determine method for fetching OCR for a canvas.
Expand All @@ -36,6 +74,8 @@ def get_ocr(canvas):
:return: List of dicts of parsed OCR data.
:rtype: list
"""
if 'fake.info' in canvas.IIIF_IMAGE_SERVER_BASE.IIIF_IMAGE_SERVER_BASE:
return get_fake_ocr()
if canvas.default_ocr == "line":
result = fetch_alto_ocr(canvas)
return add_alto_ocr(result)
Expand Down Expand Up @@ -178,8 +218,8 @@ def add_alto_ocr(result):
for zones in surface:
if 'zone' in zones.tag:
for line in zones:
if line[-1].text is None:
continue
# if line[-1].text is None:
# continue
ocr.append({
'content': line[-1].text,
'h': int(line.attrib['lry']) - int(line.attrib['uly']),
Expand Down
101 changes: 79 additions & 22 deletions apps/iiif/canvases/tests/tests.py
@@ -1,9 +1,10 @@
"""
Test cases for `apps.iiif.canvases`
Test cases for :class:`apps.iiif.canvases`
"""
import json
from io import StringIO
import httpretty
from bs4 import BeautifulSoup
from django.test import TestCase, Client
from django.urls import reverse
from django.core.management import call_command
Expand All @@ -29,7 +30,7 @@ def setUp(self):
def test_default_iiif_image_server_url(self):
i_server = IServer()
assert i_server.IIIF_IMAGE_SERVER_BASE == settings.IIIF_IMAGE_SERVER_BASE

def test_app_config(self):
assert CanvasesConfig.verbose_name == 'Canvases'
assert CanvasesConfig.name == 'apps.iiif.canvases'
Expand Down Expand Up @@ -94,7 +95,7 @@ def test_ia_ocr_creation(self):

def test_fedora_ocr_creation(self):
valid_fedora_positional_response = """523\t 116\t 151\t 45\tDistillery\r\n 704\t 117\t 148\t 52\tplaid,"\r\n""".encode('UTF-8-sig')

ocr = services.add_positional_ocr(self.canvas, valid_fedora_positional_response)
assert len(ocr) == 2
for word in ocr:
Expand Down Expand Up @@ -145,6 +146,9 @@ def test_line_by_line_from_alto(self):
assert ocr.x == 916
assert ocr.y == 0

for num, anno in enumerate(updated_canvas.annotation_set.all(), start=1):
assert anno.order == num

@httpretty.activate
def test_ocr_from_tsv(self):
tsv = """content\tx\ty\tw\th\nJordan\t459\t391\t89\t43\t\n\t453\t397\t397\t3\n \t1\t2\t3\t4\n"""
Expand Down Expand Up @@ -173,7 +177,7 @@ def test_from_bad_alto(self):
assert ocr is None

def test_canvas_detail(self):
kwargs = { 'manifest': self.manifest.pid, 'pid': self.canvas.pid }
kwargs = {'manifest': self.manifest.pid, 'pid': self.canvas.pid}
url = reverse('RenderCanvasDetail', kwargs=kwargs)
response = self.client.get(url)
serialized_canvas = json.loads(response.content.decode('UTF-8-sig'))
Expand Down Expand Up @@ -211,7 +215,7 @@ def test_wide_image_crops(self):
assert canvas.thumbnail_crop_landscape == "%s/%s/pct:25,0,50,100/,250/0/default.jpg" % (canvas.IIIF_IMAGE_SERVER_BASE, pid)
assert canvas.thumbnail_crop_tallwide == "%s/%s/pct:5,5,90,90/250,/0/default.jpg" % (canvas.IIIF_IMAGE_SERVER_BASE, pid)
assert canvas.thumbnail_crop_volume == "%s/%s/pct:25,15,50,85/,600/0/default.jpg" % (canvas.IIIF_IMAGE_SERVER_BASE, pid)

def test_result_property(self):
assert self.canvas.result == "a retto , dio Quef\u00eca de'"

Expand All @@ -221,12 +225,18 @@ def test_get_image_info(self):
updated_canvas = Canvas.objects.get(pk=self.canvas.pk)
assert updated_canvas.image_info['height'] == 3000
assert updated_canvas.image_info['width'] == 3000

def test_command_output_rebuild_canvas(self):
out = StringIO()
call_command('rebuild_ocr', canvas=Canvas.objects.all().first().pid, stdout=out)
assert 'OCR rebuilt for canvas' in out.getvalue()

def test_command_output_rebuild_canvas_with_no_existing_annotations(self):
canvas = CanvasFactory.create(manifest=self.manifest)
out = StringIO()
call_command('rebuild_ocr', canvas=canvas.pid, stdout=out)
assert 'OCR rebuilt for canvas' in out.getvalue()

def test_command_output_rebuild_manifest(self):
out = StringIO()
call_command('rebuild_ocr', manifest=Manifest.objects.all().first().pid, stdout=out)
Expand All @@ -247,22 +257,69 @@ def test_command_output_rebuild_pid_not_given(self):
call_command('rebuild_ocr', stdout=out)
assert 'ERROR: your must provide a manifest or canvas pid' in out.getvalue()

# def test_command_rebuild_ocr(self):
# iiif_server = IServer.objects.get(IIIF_IMAGE_SERVER_BASE='https://images.readux.ecds.emory/')
# self.canvas.IIIF_IMAGE_SERVER_BASE = iiif_server
# self.canvas.save()
# out = StringIO()
# self.canvas.label = 'karl'
# call_command('rebuild_ocr', canvas=self.canvas.pid, testing=True, stdout=out)
# assert 'yup' in out.getvalue()
# # ocr = canvas.annotation_set.all().first()
# # assert ocr.h == 43
# # assert ocr.w == 89
# # assert ocr.x == 459
# # assert ocr.y == 391
# # assert 'Jordan' in ocr.content
# # assert len(canvas.annotation_set.all()) == 1

def test_command_rebuild_ocr_canvas(self):
original_anno_count = self.canvas.annotation_set.all().count()
# Check the OCR attributes before rebuilding.
first_anno = self.canvas.annotation_set.all().first()
assert first_anno.h == 22
assert first_anno.w == 22
assert first_anno.x == 1146
assert first_anno.y == 928
original_span = BeautifulSoup(first_anno.content, 'html.parser')
assert 'Dope' not in original_span.string
assert original_span.span is not None
assert original_span.span.span is None
self.canvas.IIIF_IMAGE_SERVER_BASE = IServer.objects.get(
IIIF_IMAGE_SERVER_BASE='http://fake.info'
)
self.canvas.save()
out = StringIO()
call_command('rebuild_ocr', canvas=self.canvas.pid, testing=True, stdout=out)
assert 'OCR rebuilt for canvas' in out.getvalue()
ocr = self.canvas.annotation_set.all().first()
assert ocr.h == 22
assert ocr.w == 22
assert ocr.x == 1146
assert ocr.y == 928
new_span = BeautifulSoup(ocr.content, 'html.parser')
assert 'Dope' in new_span.string
assert original_span.string not in new_span.string
assert new_span.span is not None
assert new_span.span.span is None
assert len(self.canvas.annotation_set.all()) == original_anno_count + 1

def test_command_rebuild_ocr_manifest(self):
canvas = Canvas.objects.get(pk='a7f1bd69-766c-4dd4-ab66-f4051fdd4cff')
original_anno_count = canvas.annotation_set.all().count()
# Check the OCR attributes before rebuilding.
first_anno = canvas.annotation_set.all().first()
assert first_anno.h == 22
assert first_anno.w == 22
assert first_anno.x == 1146
assert first_anno.y == 928
original_span = BeautifulSoup(first_anno.content, 'html.parser')
assert 'southernplayalisticadillacmuzik' not in original_span.string
assert original_span.span is not None
assert original_span.span.span is None
canvas.IIIF_IMAGE_SERVER_BASE = IServer.objects.get(
IIIF_IMAGE_SERVER_BASE='http://fake.info'
)
canvas.save()
out = StringIO()
call_command('rebuild_ocr', manifest=canvas.manifest.pid, testing=True, stdout=out)
assert 'OCR rebuilt for manifest' in out.getvalue()
ocr = canvas.annotation_set.all().first()
assert ocr.h == 22
assert ocr.w == 22
assert ocr.x == 1146
assert ocr.y == 928
new_span = BeautifulSoup(ocr.content, 'html.parser')
assert 'Dope' in new_span.string
assert original_span.string not in new_span.string
assert new_span.span is not None
assert new_span.span.span is None
assert len(canvas.annotation_set.all()) == original_anno_count + 1

def test_no_alto_for_internet_archive(self):
iiif_server = IServer.objects.get(IIIF_IMAGE_SERVER_BASE='https://iiif.archivelab.org/iiif/')
canvas = CanvasFactory(IIIF_IMAGE_SERVER_BASE=iiif_server, manifest=self.canvas.manifest)
Expand Down
2 changes: 1 addition & 1 deletion apps/iiif/canvases/urls.py
@@ -1,5 +1,5 @@
"""
URL patterns for `apps.iiif.canvases`
URL patterns for :class:`apps.iiif.canvases`
"""
from django.urls import path
from .views import IIIFV2Detail, IIIFV2List
Expand Down
3 changes: 2 additions & 1 deletion apps/iiif/canvases/views.py
Expand Up @@ -70,5 +70,6 @@ def get(self, request, *args, **kwargs): # pylint: disable = unused-argument
'canvas',
self.get_queryset()
)
)
),
safe=False
)
2 changes: 1 addition & 1 deletion apps/iiif/kollections/tests/factories.py
Expand Up @@ -8,7 +8,7 @@

class CollectionFactory(DjangoModelFactory):
"""
Factory for mocking `apps.iiif.kollections.models.Collection` objects.
Factory for mocking :class:`apps.iiif.kollections.models.Collection` objects.
"""
pid = str(random.randrange(2000, 5000))
label = Faker("name")
Expand Down
9 changes: 9 additions & 0 deletions apps/iiif/kollections/tests/tests.py
Expand Up @@ -11,6 +11,7 @@
import config.settings.local as settings
from ..views import CollectionSitemap
from ..models import Collection
from ..admin import CollectionAdmin, ManifestInline
from ...manifests.models import Manifest

class KollectionTests(TestCase):
Expand Down Expand Up @@ -168,3 +169,11 @@ def test_serialize_single_object(self):
collection = json.loads(serialize('kollection', [Collection.objects.all().first()]))
assert collection['@type'] == 'sc:Collection'
assert isinstance(collection, dict)

def test_collection_admin_inlines(self):
pid = Manifest.collections.through.objects.all().first().manifest.pid
admin_pid = CollectionAdmin.inlines[0].manifest_pid(
ManifestInline,
Manifest.collections.through.objects.all().first()
)
assert pid == admin_pid

0 comments on commit ed8e15c

Please sign in to comment.