Merge branch 'develop' into feature/social

ecds · May 16, 2019 · be72299 · be72299
2 parents d62c269 + a8b95e3
commit be72299
Show file tree

Hide file tree

Showing 20 changed files with 509 additions and 231 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,10 @@ parsetab.py
 media/
 !apps/static/mirador/plugins/media
 
+### django configs and files
+db.cnf
+staticfiles/*
+
 ### python gitignores auto-generated by github
 
 
@@ -122,6 +126,7 @@ static/plugins/annotator/scss/node_modules
 local.py
 venv
 cert*
+*.bk
 
 # Sphinx documentation
 docs/_build/

diff --git a/README.md b/README.md
@@ -41,7 +41,7 @@ source venv/bin/activate
 4. Install the dependencies.
 
 ~~~bash
-pip install -r requirements/local
+pip install -r requirements/local.txt
 ~~~
 
 5. Copy and set up your local settings.
@@ -75,7 +75,15 @@ python manage.py runserver_plus --cert-file cert.crt  0.0.0.0:3000
 
 [![Coverage Status](https://coveralls.io/repos/github/ecds/readux/badge.svg?branch=develop)](https://coveralls.io/github/ecds/readux?branch=develop)
 
-Readux uses Django's default test framework, but is configured to use pytest. To run the tests, simply run:
+Readux uses Django's default test framework, but is configured to use pytest.
+
+Your database user will need to be able to create a database:
+
+~~~
+alter user readux createdb;
+~~~
+
+To run the tests, simply run:
 
 ~~~bash
 pytest

diff --git a/apps/iiif/annotations/models.py b/apps/iiif/annotations/models.py
@@ -103,39 +103,22 @@ class Annotation(models.Model):
     language = models.CharField(max_length=10, default='en')
     owner = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, blank=True, null=True)
     oa_annotation = JSONField(default=dict, blank=False)
-    # TODO should probably change svg to span
+    # TODO Should we keep this for annotations from Mirador, or just get rid of it?
     svg = models.TextField()
 
     ordering = ['order']
 
-    # @property
-    # def identifier(self):
-    #   """
-    #   http://example.org/iiif/book1/list/p1
-    #   """
-    #   return "%s/iiif/%s/canvas/%s" % (settings.HOSTNAME, self.manifest.pid, self.pid)
-
     def parse_oa_annotation(self):
-        dimensions = self.oa_annotation['on'][0]['selector']['default']['value'].split('=')[-1].split(',')
+        dimensions = self.oa_annotation['on']['selector']['default']['value'].split('=')[-1].split(',')
         self.x = dimensions[0]
         self.y = dimensions[1]
         self.w = dimensions[2]
         self.h = dimensions[3]
-        # if isinstance(self.oa_annotation, dict):
-        #     try:
-        #         # canvas = Canvas.objects.get(pid=self.oa_annotation['on'][0]['full'].split('/')[-1])
-        #         self.canvas = Canvas.objects.get(pid=page)
-        #     except (KeyError, TypeError):
-        #         print('Invalid IIIF OA Annotation: Cannot get canvas id.')
-        #         pass
+
 
     def __str__(self):
         return str(self.pk)
 
-# @receiver(signals.pre_save, sender=Annotation)
-# def set_oa_annotation(sender, instance, **kwargs):
-#     instance.parse_oa_annotation()
-
 @receiver(signals.pre_save, sender=Annotation)
 def set_span_element(sender, instance, **kwargs):
     if instance.resource_type in (sender.OCR,):
@@ -145,16 +128,16 @@ def set_span_element(sender, instance, **kwargs):
             # (12*(17.697/1.618))/12
             character_count = len(instance.content)
             font_size = (character_count*(instance.h/1.618))/character_count
-            instance.svg = "<span id='{pk}' style='font-family: monospace; height: {h}px; width: {w}px; font-size: {f}px'>{content}</span>".format(pk=instance.pk, h=str(instance.h), w=str(instance.w), content=instance.content, f=str(font_size))
+            instance.content = "<span id='{pk}' style='font-family: monospace; height: {h}px; width: {w}px; font-size: {f}px'>{content}</span>".format(pk=instance.pk, h=str(instance.h), w=str(instance.w), content=instance.content, f=str(font_size))
         except ValueError as error:
-            instance.svg = ""
+            instance.content = ""
             print("WARNING: {e}".format(e=error))
     else:
         if (type(instance.oa_annotation) == str):
             instance.oa_annotation = json.loads(instance.oa_annotation)
-        instance.svg = instance.oa_annotation['on'][0]['selector']['item']['value']
+        instance.svg = instance.oa_annotation['on']['selector']['item']['value']
         instance.oa_annotation['annotatedBy'] = {'name': 'Me'}
         instance.content = instance.oa_annotation['resource'][0]['chars']
-        instance.resource_type = Annotation.COMMENTING
+        instance.resource_type = instance.oa_annotation['resource'][0]['@type']
         instance.parse_oa_annotation()
 
diff --git a/apps/iiif/annotations/tests.py b/apps/iiif/annotations/tests.py
@@ -13,7 +13,7 @@ class AnnotationTests(APITestCase):
 
     valid_annotation = {
         'oa_annotation': '''{
-            "on": [{
+            "on": {
                 "full": "https://digi.vatlib.it/iiif/MSS_Vat.lat.3225/canvas/p0007",
                 "@type": "oa:SpecificResource",
                 "selector": {
@@ -31,7 +31,7 @@ class AnnotationTests(APITestCase):
                     "@type": "sc:Manifest",
                     "@id": "https://ecds.emory.edu/iiif/MSS_Vat.lat.3225/manifest.json"
                 }
-            }],
+            },
             "@type": "oa:Annotation",
             "@context": "http://iiif.io/api/presentation/2/context.json",
             "@id": "13d3b867-d668-4294-b56a-df3e8860016c",

diff --git a/apps/iiif/canvases/fixtures/canvases.json b/apps/iiif/canvases/fixtures/canvases.json
@@ -5,9 +5,30 @@
       "IIIF_IMAGE_SERVER_BASE": "https://loris.library.emory.edu"
   }
 },
+{
+  "model": "canvases.iserver",
+  "pk": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff",
+  "fields": {
+      "IIIF_IMAGE_SERVER_BASE": "https://iiif.archivelab.org/iiif/"
+  }
+},
 {
   "model": "canvases.canvas",
   "pk": "7261fae2-a24e-4a1c-9743-516f6c4ea0c9",
+  "fields": {
+      "label": "",
+      "pid": "fedora:emory:5622",
+      "summary": null,
+      "manifest": "464d82f6-6ae5-4503-9afc-8e3cdd92a3f1",
+      "position": 6,
+      "height": 3608,
+      "width": 1976,
+      "IIIF_IMAGE_SERVER_BASE": "02ac58b2-2c62-4f67-807e-7927ff191ace"
+  }
+},
+{
+  "model": "canvases.canvas",
+  "pk": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff",
   "fields": {
       "label": "",
       "pid": "15210893.5622.emory.edu$95",
@@ -16,6 +37,6 @@
       "position": 96,
       "height": 3608,
       "width": 1976,
-      "IIIF_IMAGE_SERVER_BASE": "02ac58b2-2c62-4f67-807e-7927ff191ace"
+      "IIIF_IMAGE_SERVER_BASE": "a7f1bd69-766c-4dd4-ab66-f4051fdd4cff"
   }
 }]
diff --git a/apps/iiif/canvases/models.py b/apps/iiif/canvases/models.py
@@ -68,6 +68,11 @@ def thumbnail_crop_landscape(self):
             # landscape
             return "%s/%s/pct:25,0,50,100/,250/0/default.jpg" % (self.IIIF_IMAGE_SERVER_BASE, self.pid)
 
+    @property
+    def result(self):
+        "Empty attribute to hold the result of requests to get OCR data."
+        return None
+
     def __str__(self):
         return str(self.pid)
 
@@ -82,24 +87,23 @@ def set_dimensions(sender, instance, **kwargs):
 
 @receiver(signals.post_save, sender=Canvas)
 def add_ocr(sender, instance, **kwargs):
-    ocr = services.add_positional_ocr(instance)
-    # What comes back from fedora is 8-bit bytes
-    # https://stackoverflow.com/a/9562196
+    result = services.fetch_positional_ocr(instance)
+    ocr = services.add_positional_ocr(instance, result)
     word_order = 1
+    print(ocr)
+    print(type(ocr))
     if ocr is not None:
-        for word in ocr.decode('UTF-8-sig').strip().split('\r\n'):
+        for word in ocr:
             if word == '':
                 continue
             a = Annotation()
             a.canvas = instance
-            print('&&&')
-            print(word)
-            a.x = int(word.split('\t')[0])
-            a.y = int(word.split('\t')[1])
-            a.w = int(word.split('\t')[2])
-            a.h = int(word.split('\t')[3])
+            a.x = word['x']
+            a.y = word['y']
+            a.w = word['w']
+            a.h = word['h']
             a.resource_type = a.OCR
-            a.content = word.split('\t')[4]
+            a.content = word['content']
             a.order = word_order
             a.save()
             word_order += 1

diff --git a/apps/iiif/canvases/services.py b/apps/iiif/canvases/services.py
@@ -11,6 +11,39 @@ def get_canvas_info(canvas):
     results = fetch_url(canvas.service_id, timeout=settings.HTTP_REQUEST_TIMEOUT, format='json')
     return results
 
-def add_positional_ocr(canvas):
-    ocr = fetch_url("{p}{c}{s}".format(p=settings.DATASTREAM_PREFIX, c=canvas.pid.replace('fedora:',''), s=settings.DATASTREAM_SUFFIX), format='text/plain')
-    return ocr
+def fetch_positional_ocr(canvas):
+    if 'archivelab' in canvas.IIIF_IMAGE_SERVER_BASE.IIIF_IMAGE_SERVER_BASE:
+        return fetch_url("https://api.archivelab.org/books/{m}/pages/{p}/ocr?mode=words".format(m=canvas.manifest.pid, p=canvas.pid.split('$')[-1]))
+    else:
+        return fetch_url("{p}{c}{s}".format(p=settings.DATASTREAM_PREFIX, c=canvas.pid.replace('fedora:',''), s=settings.DATASTREAM_SUFFIX), format='text/plain')
+
+def add_positional_ocr(canvas, result):
+    ocr = []
+    if 'archivelab' in canvas.IIIF_IMAGE_SERVER_BASE.IIIF_IMAGE_SERVER_BASE:
+        if 'ocr' in result and result['ocr'] is not None:
+            for index, word in enumerate(result['ocr']):
+                if len(word) > 0:
+                    for w in word:
+                        ocr.append({
+                            'content': w[0],
+                            'w': (w[1][2] - w[1][0]),
+                            'h': (w[1][1] - w[1][3]),
+                            'x': w[1][0],
+                            'y': w[1][3] 
+                        })
+    else:
+        if result is not None:
+            # What comes back from fedora is 8-bit bytes
+            for index, word in enumerate(result.decode('UTF-8-sig').strip().split('\r\n')):
+                if (len(word.split('\t')) == 5):
+                    ocr.append({
+                        'content': word.split('\t')[4],
+                        'w': int(word.split('\t')[2]),
+                        'h': int(word.split('\t')[3]),
+                        'x': int(word.split('\t')[0]),
+                        'y': int(word.split('\t')[1])
+                    })
+    if (ocr):
+        return ocr
+    else:
+        return None
diff --git a/apps/iiif/canvases/tests.py b/apps/iiif/canvases/tests.py
@@ -1,3 +1,78 @@
 from django.test import TestCase
+from .models import Canvas
+from . import services
 
-# Create your tests here.
+
+class CanvasTests(TestCase):
+    fixtures = ['kollections.json', 'manifests.json', 'canvases.json', 'annotations.json']
+    def test_ia_ocr_creation(self):
+        valid_ia_ocr_response = {
+        'ocr': [
+            [
+            ['III', [120, 1600, 180, 1494, 1597]]
+            ],
+            [
+            ['chambray', [78, 1734, 116, 1674, 1734]]
+            ],
+            [
+            ['tacos', [142, 1938, 188, 1854, 1938]]
+            ],
+            [
+            ['freegan', [114, 2246, 196, 2156, 2245]]
+            ],
+            [
+            ['Kombucha', [180, 2528, 220, 2444, 2528]]
+            ],
+            [
+            ['succulents', [558, 535, 588, 501, 535]],
+            ['Thundercats', [928, 534, 1497, 478, 527]]
+            ],
+            [
+            ['poke', [557, 617, 646, 575, 614]],
+            ['VHS', [700, 612, 1147, 555, 610]],
+            ['chartreuse ', [1191, 616, 1209, 589, 609]],
+            ['pabst', [1266, 603, 1292, 569, 603]],
+            ['8-bit', [1354, 602, 1419, 549, 600]],
+            ['narwhal', [1471, 613, 1566, 553, 592]],
+            ['XOXO', [1609, 604, 1670, 538, 596]],
+            ['post-ironic', [1713, 603, 1826, 538, 590]],
+            ['synth', [1847, 588, 1859, 574, 588]]
+            ],
+            [
+            ['lumbersexual', [1741, 2928, 1904, 2881, 2922]]
+            ]
+        ]
+        }
+
+        canvas = Canvas.objects.get(pid='15210893.5622.emory.edu$95')
+        ocr = services.add_positional_ocr(canvas, valid_ia_ocr_response)
+        assert len(ocr) == 17
+        for word in ocr:
+            assert 'w' in word
+            assert 'h' in word
+            assert 'x' in word
+            assert 'y' in word
+            assert 'content' in word
+            assert type(word['w']) == int
+            assert type(word['h']) == int
+            assert type(word['x']) == int
+            assert type(word['y']) == int
+            assert type(word['content']) == str
+
+    def test_fedora_ocr_creation(self):
+        valid_fedora_positional_response = """523\t 116\t 151\t  45\tDistillery\r\n 704\t 117\t 148\t  52\tplaid,"\r\n""".encode('UTF-8-sig')
+
+        canvas = Canvas.objects.get(pid='fedora:emory:5622')
+        ocr = services.add_positional_ocr(canvas, valid_fedora_positional_response)
+        assert len(ocr) == 2
+        for word in ocr:
+            assert 'w' in word
+            assert 'h' in word
+            assert 'x' in word
+            assert 'y' in word
+            assert 'content' in word
+            assert type(word['w']) == int
+            assert type(word['h']) == int
+            assert type(word['x']) == int
+            assert type(word['y']) == int
+            assert type(word['content']) == str
diff --git a/apps/iiif/canvases/views.py b/apps/iiif/canvases/views.py
@@ -30,9 +30,11 @@ def post(self, request, *args, **kwargs):
         payload = json.loads(request.body.decode('utf-8'))
         oa_annotation = json.loads(payload['oa_annotation'])
         canvas = Canvas.objects.get(pid=oa_annotation['on'][0]['full'].split('/')[-1])
+        user_id = request.user.id
         annotation = Annotation()
         annotation.canvas = canvas
         annotation.oa_annotation = oa_annotation
+        annotation.owner_id = user_id
         annotation.save()
         return JsonResponse(oa_annotation, safe=False)