Permalink
Browse files

Improve logging, error handling and comment typos.

  • Loading branch information...
1 parent b202615 commit f372e633644786670c820d6f47cf4345ce834e2f @davidread davidread committed Jun 27, 2012
Showing with 67 additions and 24 deletions.
  1. +37 −10 ckanext/qa/commands.py
  2. +30 −14 ckanext/qa/tasks.py
View
@@ -4,11 +4,11 @@
import urlparse
from pylons import config
from ckan.lib.cli import CkanCommand
-from ckan.logic import get_action
-from ckan import model
-from ckan.model.types import make_uuid
+
import logging
-logger = logging.getLogger()
+
+class CkanApiError(Exception):
+ pass
class QACommand(CkanCommand):
"""QA analysis of CKAN resources
@@ -44,6 +44,14 @@ def command(self):
cmd = self.args[0]
self._load_config()
+ # Now we can import ckan and create logger, knowing that loggers
+ # won't get disabled
+ self.log = logging.getLogger('qa')
+
+ from ckan.logic import get_action
+ from ckan import model
+ from ckan.model.types import make_uuid
+
#import tasks after load config so CKAN_CONFIG evironment variable can be set
import tasks
@@ -57,10 +65,11 @@ def command(self):
if cmd == 'update':
for package in self._package_list():
- logger.info("Updating QA on dataset: %s (%d resources)" %
+ self.log.info("QA on dataset being added to Celery queue: %s (%d resources)" %
(package.get('name'), len(package.get('resources', []))))
for resource in package.get('resources', []):
+ resource['package'] = package['name']
data = json.dumps(resource)
task_id = make_uuid()
task_status = {
@@ -81,10 +90,10 @@ def command(self):
tasks.update.apply_async(args=[context, data], task_id=task_id)
elif cmd == 'clean':
- logger.error('Command "%s" not implemented' % (cmd,))
+ self.log.error('Command "%s" not implemented' % (cmd,))
else:
- logger.error('Command "%s" not recognized' % (cmd,))
+ self.log.error('Command "%s" not recognized' % (cmd,))
def _package_list(self):
"""
@@ -100,18 +109,36 @@ def _package_list(self):
if len(self.args) > 1:
for id in self.args[1:]:
data = json.dumps({'id': unicode(id)})
- response = requests.post(api_url + '/package_show', data)
+ url = api_url + '/package_show'
+ response = requests.post(url, data)
+ if not response.ok:
+ err = ('Failed to get package %s from url %r: %s' %
+ (id, url, response.error))
+ self.log.error(err)
+ raise CkanApiError(err)
yield json.loads(response.content).get('result')
else:
page, limit = 1, 100
- response = requests.post(api_url + '/current_package_list_with_resources',
+ url = api_url + '/current_package_list_with_resources'
+ response = requests.post(url,
json.dumps({'page': page, 'limit': limit}))
+ if not response.ok:
+ err = ('Failed to get package list with resources from url %r: %s' %
+ (url, response.error))
+ self.log.error(err)
+ raise CkanApiError(err)
chunk = json.loads(response.content).get('result')
while(chunk):
page += 1
for p in chunk:
yield p
- response = requests.post(api_url + '/current_package_list_with_resources',
+ url = api_url + '/current_package_list_with_resources'
+ response = requests.post(url,
json.dumps({'page': page, 'limit': limit}))
+ if not response.ok:
+ err = ('Failed to get package list with resources from url %r: %s' %
+ (url, response.error))
+ self.log.error(err)
+ raise CkanApiError(err)
chunk = json.loads(response.content).get('result')
View
@@ -1,7 +1,7 @@
"""
-Score datasets on Sir Tim Bernes-Lee's five stars of openness based on mime-type.
+Score datasets on Sir Tim Berners-Lee's five stars of openness based on mime-type.
"""
-from datetime import datetime
+import datetime
import mimetypes
import json
import requests
@@ -74,43 +74,48 @@ def _task_status_data(id, result):
'task_type': 'qa',
'key': u'openness_score',
'value': result['openness_score'],
- 'last_updated': datetime.now().isoformat()
+ 'last_updated': datetime.datetime.now().isoformat()
},
{
'entity_id': id,
'entity_type': u'resource',
'task_type': 'qa',
'key': u'openness_score_reason',
'value': result['openness_score_reason'],
- 'last_updated': datetime.now().isoformat()
+ 'last_updated': datetime.datetime.now().isoformat()
},
{
'entity_id': id,
'entity_type': u'resource',
'task_type': 'qa',
'key': u'openness_score_failure_count',
'value': result['openness_score_failure_count'],
- 'last_updated': datetime.now().isoformat()
- }
+ 'last_updated': datetime.datetime.now().isoformat()
+ },
]
@celery.task(name = "qa.update")
def update(context, data):
"""
- Score resources on Sir Tim Bernes-Lee's five stars of openness based on mime-type.
+ Score resources on Sir Tim Berners-Lee's five stars of openness based on mime-type.
Returns a JSON dict with keys:
'openness_score': score (int)
'openness_score_reason': the reason for the score (string)
'openness_score_failure_count': the number of consecutive times this resource has returned a score of 0
"""
+ log = update.get_logger()
try:
data = json.loads(data)
context = json.loads(context)
result = resource_score(context, data)
+ log.info('Openness score for dataset %s (res#%s): %r (%s)',
+ data['package'], data['position'],
+ result['openness_score'], result['openness_score_reason'])
+
task_status_data = _task_status_data(data['id'], result)
api_url = urlparse.urljoin(context['site_url'], 'api/action')
@@ -120,37 +125,47 @@ def update(context, data):
headers = {'Authorization': context['apikey'],
'Content-Type': 'application/json'}
)
- if response.status_code != 200:
- raise CkanError('ckan failed to update task_status, status_code (%s), error %s'
- % (response.status_code, response.content))
+ if not response.ok:
+ err = 'ckan failed to update task_status, error %s' \
+ % response.error
+ log.error(err)
+ raise CkanError(err)
+ elif response.status_code != 200:
+ err = 'ckan failed to update task_status, status_code (%s), error %s' \
+ % (response.status_code, response.content)
+ log.error(err)
+ raise CkanError(err)
return json.dumps(result)
except Exception, e:
+ log.error('Exception occurred during QA update: %s: %s', e.__class__.__name__, unicode(e))
_update_task_status(context, {
'entity_id': data['id'],
'entity_type': u'resource',
'task_type': 'qa',
'key': u'celery_task_id',
'value': unicode(update.request.id),
'error': '%s: %s' % (e.__class__.__name__, unicode(e)),
- 'last_updated': datetime.now().isoformat()
+ 'last_updated': datetime.datetime.now().isoformat()
})
raise
def resource_score(context, data):
"""
- Score resources on Sir Tim Bernes-Lee's five stars of openness based on mime-type.
+ Score resources on Sir Tim Berners-Lee's five stars of openness based on mime-type.
returns a dict with keys:
'openness_score': score (int)
'openness_score_reason': the reason for the score (string)
'openness_score_failure_count': the number of consecutive times this resource has returned a score of 0
-
+
Raises the following exceptions:
"""
+ log = update.get_logger()
+
score = 0
score_reason = ""
score_failure_count = 0
@@ -214,6 +229,7 @@ def resource_score(context, data):
except LinkCheckerError, e:
score_reason = str(e)
except Exception, e:
+ log.error('Unexpected error while calculating openness score %s: %s', e.__class__.__name__, unicode(e))
score_reason = "Unknown error: %s" % str(e)
if score == 0:
@@ -224,6 +240,6 @@ def resource_score(context, data):
return {
'openness_score': score,
'openness_score_reason': score_reason,
- 'openness_score_failure_count': score_failure_count
+ 'openness_score_failure_count': score_failure_count,
}

0 comments on commit f372e63

Please sign in to comment.