Permalink
Browse files

Alteração dos nomes de algumas funções do módulo ``utils.functions``

  • Loading branch information...
1 parent 1c9cb0f commit 7d6b6c0f73ba2df7dfb9fd8008fd4ed81b6baebc @gustavofonseca committed Mar 5, 2013
Showing with 65 additions and 290 deletions.
  1. +10 −0 opac/catalog/admin.py
  2. +34 −68 opac/utils/functions.py
  3. +4 −1 opac/utils/sync/datacollector.py
  4. +11 −6 opac/utils/tasks.py
  5. +6 −215 opac/utils/tests/tests_functions.py
View
@@ -119,5 +119,15 @@ def has_delete_permission(self, request, obj=None):
return False
+class SyncAdmin(admin.ModelAdmin):
+ list_display = ('started_at', 'ended_at', 'last_seq', 'status')
+
+ def has_add_permission(self, request):
+ return False
+
+ def has_delete_permission(self, request, obj=None):
+ return False
+
admin.site.register(models.CollectionMeta, CollectionMetaAdmin)
admin.site.register(models.JournalMeta, JournalMetaAdmin)
+admin.site.register(models.Sync, SyncAdmin)
View
@@ -9,6 +9,10 @@
def make_journal_pipeline():
+ """
+ Returns a ``pipes.Pipeline`` instance wired with
+ all the pipes needed to transform journals data.
+ """
ppl = pipes.Pipeline(pipes.PIssue,
pipes.PMission,
pipes.PSection,
@@ -17,11 +21,11 @@ def make_journal_pipeline():
return ppl
-def _get_user_catalog_definitions():
+def get_user_catalog_definitions():
"""
- It analyses the choices the user made, and returns a list
+ Analyses the choices the user made, and returns a list
in the form:
- [[<collection_name_slug>,], [<journal>,]]
+ [[<collection>,], [<journal>,]]
"""
collections = models.CollectionMeta.objects.members()
@@ -39,7 +43,7 @@ def _get_user_catalog_definitions():
return [full_collections, journals_a_la_carte]
-def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
+def get_all_data_for_build(managerapi_dep=datacollector.SciELOManagerAPI):
"""
Returns an iterator containing all journals that must be synced
to build the catalog.
@@ -50,7 +54,7 @@ def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
sync all its journals.
"""
scielo_api = managerapi_dep(settings=settings)
- full_collections, journals_a_la_carte = _get_user_catalog_definitions()
+ full_collections, journals_a_la_carte = get_user_catalog_definitions()
full_collections = (c.name_slug for c in full_collections)
journals_a_la_carte = (j.resource_id for j in journals_a_la_carte)
@@ -61,14 +65,14 @@ def _what_to_sync(managerapi_dep=datacollector.SciELOManagerAPI):
)
-def _what_have_changed(since=0, managerapi_dep=datacollector.SciELOManagerAPI):
+def get_all_changes(since=0, managerapi_dep=datacollector.SciELOManagerAPI):
"""
- Returns a dict with the keys ``issues`` and ``journals``, where
- each one is an iterator containing all data that must be created
- or updated in order to keep the catalog updated.
+ Returns a ``utils.datacollector.ChangesList`` instance
+ containing all data that must be created or updated in
+ order to keep the catalog updated.
"""
scielo_api = managerapi_dep(settings=settings)
- full_collections, journals_a_la_carte = _get_user_catalog_definitions()
+ full_collections, journals_a_la_carte = get_user_catalog_definitions()
data = scielo_api.get_changes(since=since)
@@ -80,72 +84,34 @@ def _what_have_changed(since=0, managerapi_dep=datacollector.SciELOManagerAPI):
def _list_issues_uri(journal_meta, journal_dep=mongomodels.Journal):
- # TODO: This instantiation logic must be at Journal.get_journal
- journal_data = journal_dep.objects.find_one({'id': journal_meta.resource_id})
- journal_doc = journal_dep(**journal_data)
- return (issue.resource_uri for issue in journal_doc.list_issues())
+ # TODO: This instantiation logic must be at Journal.get_journal
+ journal_data = journal_dep.objects.find_one({'id': journal_meta.resource_id})
+ journal_doc = journal_dep(**journal_data)
+ return (issue.resource_uri for issue in journal_doc.list_issues())
-def identify_changes(changes,
- collections,
- journals,
- list_issues_uri_dep=_list_issues_uri):
+def get_last_seq():
"""
- Returns a dict where the keys are ``journals`` and ``issues``
- both containing a list of ``object_uri`` that must be
- synced.
-
- ``changes`` is an iterable where each element is an
- entry in changes API.
-
- ``collections`` is an iterable of collections
- that must have all its journals synced.
- ``journals`` is an iterable of journals that must
- be synced.
"""
- journals_list = []
- issues_list = []
-
- # list uris from all journals and its issues
- for j in journals:
- journals_list.append(j.resource_uri)
- issues_list.append(list_issues_uri_dep(j))
-
- collections_uris = set(c.resource_uri for c in collections)
- journals_uris = set(journals_list)
- issues_uris = set(itertools.chain(*issues_list))
-
- changed_journals = set()
- changed_issues = set()
-
- for change_rec in changes:
- _collection_uri = change_rec.get('collection_uri')
- _object_uri = change_rec.get('object_uri')
-
- if _collection_uri in collections_uris:
- # identify the endpoint to know how to classify the uri
- endpoint = [seg for seg in _object_uri.split('/') if seg][-2]
- if endpoint == 'journals':
- changed_journals.add(_object_uri)
- elif endpoint == 'issues':
- changed_issues.add(_object_uri)
- else:
- continue
- elif _object_uri in journals_uris:
- changed_journals.add(_object_uri)
- elif _object_uri in issues_uris:
- changed_issues.add(_object_uri)
- else:
- continue
-
- return {'journals': list(changed_journals), 'issues': list(changed_issues)}
-
-
-def get_last_seq():
last_sync = models.Sync.objects.all()[0]
if last_sync:
return last_sync.last_seq
else:
return 0
+
+
+def get_remote_last_seq(managerapi_dep=datacollector.SciELOManagerAPI):
+ """
+ Gets the last seq shown at the SciELO Manager's Changes API.
+
+ This function should not be used in a frequent basis, as its
+ complexity grows linearly with the Changes entries.
+ """
+ scielo_api = managerapi_dep(settings=settings)
+
+ data = scielo_api.get_changes()
+
+ last_change = list(data)[-1]
+ return last_change.get('seq', 0)
@@ -2,7 +2,6 @@
import logging
import time
import itertools
-import copy
from django.conf import settings
import slumber
@@ -280,6 +279,10 @@ def show(self, endpoint, unique=False):
@property
def last_seq(self):
+ """
+ Returns the seq of the last Change instance
+ in this ChangeList.
+ """
return self._changes[-1].seq
View
@@ -1,4 +1,4 @@
-from datetime import datetime
+import datetime
from django.conf import settings
from django.db import transaction
@@ -19,24 +19,29 @@ def build_catalog():
"""
ppl = functions.make_journal_pipeline()
- data = functions._what_to_sync()
+ data = functions.get_all_data_for_build()
transformed_data = ppl.run(data)
marreta = dataloader.Marreta(settings=settings)
marreta.rebuild_collection('journals', transformed_data)
- models.Sync.objects.create(ended_at=datetime.now(),
- last_seq=60, status='finished')
+ models.Sync.objects.create(ended_at=datetime.datetime.now(),
+ last_seq=functions.get_remote_last_seq(), status='finished')
+@task(name='utils.tasks.update_catalog')
def update_catalog(managerapi_dep=datacollector.SciELOManagerAPI):
+ """
+ Scans the SciELO Manager's changes API looking for
+ changes on Journals that are part of this catalog.
+ """
scielo_api = managerapi_dep(settings=settings)
journal_ppl = functions.make_journal_pipeline()
with transaction.commit_on_success():
sync = models.Sync.objects.create()
- changes = functions._what_have_changed(since=functions.get_last_seq())
+ changes = functions.get_all_changes(since=functions.get_last_seq())
changed_journals = changes.show('journals', unique=True)
# changed_issues = changes.show('issues', unique=True)
@@ -53,7 +58,7 @@ def update_catalog(managerapi_dep=datacollector.SciELOManagerAPI):
sync.last_seq = changes.last_seq
sync.status = 'finished'
- sync.ended_at = datetime.now()
+ sync.ended_at = datetime.datetime.now()
sync.save()
Oops, something went wrong.

0 comments on commit 7d6b6c0

Please sign in to comment.