Permalink
Browse files

Implementação básica da classe ChangesList

  • Loading branch information...
1 parent f6e2e82 commit 17de104ec803f99f484ae09330620b22d96dcf8f @gustavofonseca committed Mar 1, 2013
Showing with 263 additions and 13 deletions.
  1. +18 −7 opac/utils/functions.py
  2. +91 −0 opac/utils/sync/datacollector.py
  3. +11 −6 opac/utils/tasks.py
  4. +143 −0 opac/utils/tests/tests_datacollector.py
@@ -3,10 +3,20 @@
from django.conf import settings
from .sync import datacollector
+from .sync import pipes
from catalog import models
from catalog import mongomodels
+def make_journal_pipeline():
+ ppl = pipes.Pipeline(pipes.PIssue,
+ pipes.PMission,
+ pipes.PSection,
+ pipes.PNormalizeJournalTitle,
+ pipes.PCleanup)
+ return ppl
+
+
def _get_user_catalog_definitions():
"""
It analyses the choices the user made, and returns a list
@@ -51,22 +61,23 @@ def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
)
-def _what_have_changed(managerapi_dep=datacollector.SciELOManagerAPI):
+def _what_have_changed(since=0, managerapi_dep=datacollector.SciELOManagerAPI):
"""
- Returns an iterator containing all journals that must be created
+ Returns a dict with the keys ``issues`` and ``journals``, where
+ each one is an iterator containing all data that must be created
or updated in order to keep the catalog updated.
"""
scielo_api = managerapi_dep(settings=settings)
full_collections, journals_a_la_carte = _get_user_catalog_definitions()
- data = scielo_api.get_changes()
+ data = scielo_api.get_changes(since=since)
changed = identify_changes(data, full_collections, journals_a_la_carte)
- return itertools.chain(
- scielo_api.get_issues(*changed['issues']),
- scielo_api.get_journals(*changed['journals'])
- )
+ return {
+ 'issues': scielo_api.get_issues(*changed['issues']),
+ 'journals': scielo_api.get_journals(*changed['journals'])
+ }
def _list_issues_uri(journal_meta, journal_dep=mongomodels.Journal):
@@ -2,11 +2,14 @@
import logging
import time
import itertools
+import copy
from django.conf import settings
import slumber
import requests
+from catalog import mongomodels
+
logger = logging.getLogger(__name__)
ITEMS_PER_REQUEST = 50
@@ -146,3 +149,91 @@ def get_issues(self, *issues):
"""
for i in issues:
yield self.fetch_data('issues', resource_id=i)
+
+
+def _list_issues_uri(journal_meta, journal_dep=mongomodels.Journal):
+ # TODO: This instantiation logic must be at Journal.get_journal
+ journal_data = journal_dep.objects.find_one({'id': journal_meta.resource_id})
+
+ if not journal_data:
+ raise ValueError('invalid id for Journals: %s' % journal_meta.resource_id)
+
+ journal_doc = journal_dep(**journal_data)
+
+ return (issue.resource_uri for issue in journal_doc.list_issues())
+
+
+class ChangeListIterator(object):
+ def __init__(self, data):
+ self._data = sorted(copy.deepcopy(data), key=lambda x: x.seq)
+ self._index = -1
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ self._index += 1
+ try:
+ return self._data[self._index]
+ except IndexError:
+ raise StopIteration()
+
+
+class ChangesList(object):
+
+ def __init__(self, data, list_issues_uri_dep=_list_issues_uri):
+ self._changes = []
+
+ for event in data:
+ if isinstance(event, dict):
+ self._changes.append(Change(**event))
+ elif isinstance(event, Change):
+ self._changes.append(event)
+ else:
+ raise TypeError()
+
+ self.list_issues_uri = list_issues_uri_dep
+
+ def filter(self, collections=None, journals=None):
+ """
+ Produces another ChangesList instance containing only
+ Changes that match the expectations.
+ """
+ journals_list = []
+ issues_list = []
+
+ # list uris from all journals and its issues
+ if journals:
+ for j in journals:
+ journals_list.append(j.resource_uri)
+ issues_list.append(self.list_issues_uri(j))
+
+ journals_uris = set(journals_list)
+ issues_uris = set(itertools.chain(*issues_list))
+
+ if collections:
+ collections_uris = set(c.resource_uri for c in collections)
+ else:
+ collections_uris = []
+
+ superset = set().union(journals_uris, issues_uris, collections_uris)
+
+ changes = []
+ for change in self:
+ _collection_uri = change.collection_uri
+ _object_uri = change.object_uri
+
+ if _collection_uri in superset or _object_uri in superset:
+ changes.append(change)
+
+ return ChangesList(changes)
+
+ def __iter__(self):
+ return ChangeListIterator(self._changes)
+
+
+class Change(object):
+
+ def __init__(self, **kwargs):
+ for key, value in kwargs.items():
+ setattr(self, key, value)
View
@@ -3,7 +3,6 @@
from .sync import datacollector
from .sync import dataloader
-from .sync import pipes
from . import functions
from catalog import models
@@ -15,11 +14,7 @@ def build_catalog():
Important! All catalog public data are erased and reconstructed
when you perform this operation.
"""
- ppl = pipes.Pipeline(pipes.PIssue,
- pipes.PMission,
- pipes.PSection,
- pipes.PNormalizeJournalTitle,
- pipes.PCleanup)
+ ppl = functions.make_journal_pipeline()
data = functions._what_to_sync()
transformed_data = ppl.run(data)
@@ -28,6 +23,16 @@ def build_catalog():
marreta.rebuild_collection('journals', transformed_data)
+def update_catalog():
+ journal_ppl = functions.make_journal_pipeline()
+
+ data = functions._what_have_changed()
+ transformed_data = journal_ppl.run(data['journals'])
+
+ marreta = dataloader.Marreta(settings=settings)
+ marreta.update_collection('journals', transformed_data)
+
+
def sync_collections_meta(managerapi_dep=datacollector.SciELOManagerAPI):
"""
Fetches the metadata about available Collections in order for the
@@ -1,6 +1,8 @@
# coding: utf-8
import mocker
+from catalog.test import modelfactories
+
class SciELOManagerAPITests(mocker.MockerTestCase):
valid_full_microset = {
@@ -80,3 +82,144 @@ def test_single_document_of_an_endpoint(self):
res = api.fetch_data('journals', resource_id=1)
self.assertIn('title', res)
+
+
+class ChangesListTests(mocker.MockerTestCase):
+
+ def test_raw_data_from_scielo_api_is_accepted_on_instantiation(self):
+ from utils.sync.datacollector import ChangesList, Change
+
+ changes = [
+ {
+ "changed_at": "2013-01-23T15:11:33.409478",
+ "collection_uri": "/api/v1/collections/1/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/31/",
+ "resource_uri": "/api/v1/changes/8/",
+ "seq": 8
+ },
+ {
+ "changed_at": "2013-01-23T15:12:33.409478",
+ "collection_uri": "/api/v1/collections/2/",
+ "event_type": "added",
+ "object_uri": "/api/v1/issues/2840/",
+ "resource_uri": "/api/v1/changes/2/",
+ "seq": 9
+ },
+ ]
+
+ ch_list = ChangesList(changes)
+
+ for ch in ch_list._changes:
+ self.assertIsInstance(ch, Change)
+
+ def test_generate_another_changes_list_based_on_filtering_by_collections(self):
+ from utils.sync.datacollector import ChangesList
+
+ changes = [
+ {
+ "changed_at": "2013-01-23T15:11:33.409478",
+ "collection_uri": "/api/v1/collections/1/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/31/",
+ "resource_uri": "/api/v1/changes/8/",
+ "seq": 8
+ },
+ {
+ "changed_at": "2013-01-23T15:12:33.409478",
+ "collection_uri": "/api/v1/collections/2/",
+ "event_type": "added",
+ "object_uri": "/api/v1/issues/2840/",
+ "resource_uri": "/api/v1/changes/2/",
+ "seq": 9
+ },
+ ]
+
+ c = modelfactories.CollectionMetaFactory.create()
+
+ ch_list = ChangesList(changes)
+ filtered_list = ch_list.filter(collections=[c])
+
+ self.assertEqual(len(filtered_list._changes), 1)
+
+ def test_generate_another_changes_list_based_on_filtering_by_journals(self):
+ from utils.sync.datacollector import ChangesList
+
+ mocker_list_issues = self.mocker.mock()
+ mocker_list_issues(mocker.ANY)
+ self.mocker.result([u'/api/v1/issues/1/'])
+ self.mocker.replay()
+
+ changes = [
+ {
+ "changed_at": "2013-01-23T15:11:33.409478",
+ "collection_uri": "/api/v1/collections/1/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/31/",
+ "resource_uri": "/api/v1/changes/8/",
+ "seq": 8
+ },
+ {
+ "changed_at": "2013-01-23T15:12:33.409478",
+ "collection_uri": "/api/v1/collections/2/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/1/",
+ "resource_uri": "/api/v1/changes/2/",
+ "seq": 9
+ },
+ ]
+
+ j = modelfactories.JournalMetaFactory.create()
+
+ ch_list = ChangesList(changes, list_issues_uri_dep=mocker_list_issues)
+ filtered_list = ch_list.filter(journals=[j])
+
+ self.assertEqual(len(filtered_list._changes), 1)
+
+ def test_changeslist_is_iterable(self):
+ from utils.sync.datacollector import ChangesList
+ changes = [
+ {
+ "changed_at": "2013-01-23T15:11:33.409478",
+ "collection_uri": "/api/v1/collections/1/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/31/",
+ "resource_uri": "/api/v1/changes/8/",
+ "seq": 8
+ },
+ {
+ "changed_at": "2013-01-23T15:12:33.409478",
+ "collection_uri": "/api/v1/collections/2/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/1/",
+ "resource_uri": "/api/v1/changes/2/",
+ "seq": 9
+ },
+ ]
+
+ ch_list = ChangesList(changes)
+ self.assertTrue(iter(ch_list))
+
+ def test_changeslist_is_sorted_by_seq(self):
+ from utils.sync.datacollector import ChangesList
+ changes = [
+ {
+ "changed_at": "2013-01-23T15:12:33.409478",
+ "collection_uri": "/api/v1/collections/2/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/1/",
+ "resource_uri": "/api/v1/changes/2/",
+ "seq": 9
+ },
+ {
+ "changed_at": "2013-01-23T15:11:33.409478",
+ "collection_uri": "/api/v1/collections/1/",
+ "event_type": "updated",
+ "object_uri": "/api/v1/journals/31/",
+ "resource_uri": "/api/v1/changes/8/",
+ "seq": 8
+ }
+ ]
+
+ ch_list = ChangesList(changes)
+ self.assertEqual([ch.seq for ch in ch_list], [8, 9])

0 comments on commit 17de104

Please sign in to comment.