Skip to content
Browse files

Implementação básica da função ``datacollector.identify_changes`` que…

… identifica documentos que necessitam ser visitados pelo utilitário de sincronização
  • Loading branch information...
1 parent 6856a1c commit f0cb9d730c0b8c19d10b777d5fc8535f411be1db @gustavofonseca committed Feb 28, 2013
Showing with 110 additions and 16 deletions.
  1. +14 −0 opac/catalog/models.py
  2. +27 −0 opac/utils/sync/datacollector.py
  3. +33 −16 opac/utils/tasks.py
  4. +36 −0 opac/utils/tests/tests_datacollector.py
View
14 opac/catalog/models.py
@@ -52,3 +52,17 @@ class Meta:
def __unicode__(self):
return self.name
+
+ @property
+ def resource_id(self):
+ cleaned = [seg for seg in self.resource_uri.split('/') if seg]
+ return cleaned[-1]
+
+
+class Sync(models.Model):
+ """
+ Represents an incremental sync event.
+ """
+ started_at = models.DateTimeField(auto_now_add=True)
+ ended_at = models.DateTimeField(null=True)
+ last_seq = models.IntegerField(default=0)
View
27 opac/utils/sync/datacollector.py
@@ -133,3 +133,30 @@ def get_all_collections(self):
Get all collections available at SciELO Manager.
"""
return self.iter_docs('collections')
+
+
+def identify_changes(changes, collections, journals):
+ """
+ Returns a list of ``object_uri`` that must be
+ synced.
+
+ ``changes`` is an iterable where each element is an
+ entry in changes API.
+
+ ``collections`` is an iterable of collections
+ that must have all its journals synced.
+
+ ``journals`` is an iterable of journals that must
+ be synced.
+ """
+ colls_resource_uri = set(c.resource_uri for c in collections)
+ jours_resource_uri = set(j.resource_uri for j in journals)
+
+ changed = []
+ for change_rec in changes:
+ if change_rec.get('collection_uri') in colls_resource_uri or (
+ change_rec.get('object_uri') in jours_resource_uri):
+
+ changed.append(change_rec.get('object_uri'))
+
+ return changed
View
49 opac/utils/tasks.py
@@ -9,17 +9,12 @@
from catalog import models
-def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
+def _get_user_catalog_definitions():
"""
- Returns an iterator containing all journals that must be synced
- to build the catalog.
-
- If the collection is marked as member, and has some
- journals that are also marked as members, we assume
- only these journals must be synchronized. Else,
- sync all its journals.
+ It analyses the choices the user made, and returns a list
+ in the form:
+ [[<collection_name_slug>,], [<journal>,]]
"""
- scielo_api = managerapi_dep(settings=settings)
collections = models.CollectionMeta.objects.members()
full_collections = []
@@ -29,21 +24,43 @@ def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
# decide if the entire collection must be synced or only some
# journals.
if collection.journals.members().exists():
- for journal in collection.journals.members():
- # getting the resource_id
- cleaned = [seg for seg in journal.resource_uri.split('/') if seg]
- resource_id = cleaned[-1]
-
- journals_a_la_carte.append(resource_id)
+ journals_a_la_carte = collection.journals.members()
else:
- full_collections.append(collection.name_slug)
+ full_collections.append(collection)
+
+ return [full_collections, journals_a_la_carte]
+
+
+def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
+ """
+ Returns an iterator containing all journals that must be synced
+ to build the catalog.
+
+ If the collection is marked as member, and has some
+ journals that are also marked as members, we assume
+ only these journals must be synchronized. Else,
+ sync all its journals.
+ """
+ scielo_api = managerapi_dep(settings=settings)
+ full_collections, journals_a_la_carte = _get_user_catalog_definitions()
+
+ full_collections = (c.name_slug for c in full_collections)
+ journals_a_la_carte = (j.resource_id for j in journals_a_la_carte)
return itertools.chain(
scielo_api.get_all_journals(*full_collections),
scielo_api.get_journals(*journals_a_la_carte)
)
+def _what_have_changed(managerapi_dep=datacollector.SciELOManagerAPI):
+ """
+ Returns an iterator containing all journals that must be created
+ or updated in order to keep the catalog updated.
+ """
+ full_collections, journals_a_la_carte = _get_user_catalog_definitions()
+
+
@task(name='utils.tasks.build_catalog')
def build_catalog():
"""
View
36 opac/utils/tests/tests_datacollector.py
@@ -1,5 +1,8 @@
# coding: utf-8
import mocker
+from django.test import TestCase
+
+from catalog.test import modelfactories
class SciELOManagerAPITests(mocker.MockerTestCase):
@@ -80,3 +83,36 @@ def test_single_document_of_an_endpoint(self):
res = api.fetch_data('journals', resource_id=1)
self.assertIn('title', res)
+
+
+class ChangesIdentificationTests(mocker.MockerTestCase, TestCase):
+
+ def test_identify_journals_given_collections(self):
+ from utils.sync.datacollector import identify_changes
+
+ changes = [
+ {
+ "changed_at": "2013-01-23T15:11:33.409478",
+ "collection_uri": "/api/v1/collections/1/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/31/",
+ "resource_uri": "/api/v1/changes/8/",
+ "seq": 8
+ },
+ {
+ "changed_at": "2013-01-23T15:12:33.409478",
+ "collection_uri": "/api/v1/collections/2/",
+ "event_type": "added",
+ "object_uri": "/api/v1/issues/2840/",
+ "resource_uri": "/api/v1/changes/2/",
+ "seq": 9
+ },
+ ]
+
+ c = modelfactories.CollectionMetaFactory.create(is_member=True)
+
+ docs = identify_changes(changes, collections=[c], journals=[])
+ self.assertEqual(docs[0], "/api/v1/journals/31/")
+
+ # def test_identify_journals_given_journals(self):
+ # self.assertTrue(True)

0 comments on commit f0cb9d7

Please sign in to comment.
Something went wrong with that request. Please try again.