Skip to content
This repository has been archived by the owner on Jun 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #438 from datagovuk/publisher-activity-report
Browse files Browse the repository at this point in the history
Publisher activity report index
  • Loading branch information
David Read committed Jun 16, 2016
2 parents 5d8329a + 90b8996 commit d46a6ae
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 36 deletions.
130 changes: 96 additions & 34 deletions ckanext/dgu/lib/reports.py
Expand Up @@ -187,16 +187,94 @@ def get_quarter_dates(datetime_now):
'last': (last_q_started, last_q_ended)}


def get_quarter_dates_merged(datetime_now):
'''Returns the dates for the period including this (current) quarter and
the last quarter. Uses calendar year, so 1 Jan to 31 Mar etc.'''
now = datetime_now
month_this_q_started = (now.month - 1) // 3 * 3 + 1
this_q_started = datetime.datetime(now.year, month_this_q_started, 1)
this_q_ended = datetime.datetime(now.year, now.month, now.day)
last_q_started = datetime.datetime(
this_q_started.year + (this_q_started.month-3)/12,
(this_q_started.month-4) % 12 + 1,
1)
last_q_ended = this_q_started - datetime.timedelta(days=1)
return {'this_and_last': (last_q_started, this_q_ended)}


def publisher_activity(organization, include_sub_organizations=False):
"""
Contains information about the datasets a specific organization has
released in this and last quarter (calendar year). This is needed by
departments for their quarterly transparency reports.
"""
import datetime

now = datetime.datetime.now()

if organization:
quarters = get_quarter_dates(now)

created, modified = _get_activity(
organization, include_sub_organizations, quarters)

datasets = []
for quarter_name in quarters:
datasets += sorted(created[quarter_name], key=lambda x: x[1])
datasets += sorted(modified[quarter_name], key=lambda x: x[1])
columns = ('Dataset name', 'Dataset title', 'Dataset notes', 'Modified or created', 'Quarter', 'Timestamp', 'Author', 'Published')

quarters_iso = dict(
[(last_or_this, [date_.isoformat() for date_ in q_list])
for last_or_this, q_list in quarters.iteritems()])

return {'table': datasets, 'columns': columns,
'quarters': quarters_iso}
else:
# index
periods = get_quarter_dates_merged(now)

stats_by_org = []
totals = collections.defaultdict(int)
import ckan.model as model
all_orgs = model.Session.query(model.Group).\
filter(model.Group.type=='organization').\
filter(model.Group.state=='active').order_by('name').\
all()
for organization in all_orgs:
created, modified = _get_activity(
organization.name, include_sub_organizations, periods)
created_names = [dataset[0] for dataset in created.values()[0]]
modified_names = [dataset[0] for dataset in modified.values()[0]]
num_created = len(created_names)
num_modified = len(modified_names)
num_total = len(set(created_names) | set(modified_names))
stats_by_org.append(OrderedDict((
('organization name', organization.name),
('organization title', organization.title),
('num created', num_created),
('num modified', num_modified),
('total', num_total),
)))
totals['num created'] += num_created
totals['num modified'] += num_modified
totals['total'] += num_total

period_iso = [date_.isoformat()
for date_ in periods.values()[0]]

return {'table': stats_by_org,
'totals': totals,
'period': period_iso}


def _get_activity(organization_name, include_sub_organizations, periods):
import ckan.model as model
from paste.deploy.converters import asbool

created = dict((period_name, []) for period_name in periods)
modified = dict((period_name, []) for period_name in periods)

# These are the authors whose revisions we ignore, as they are trivial
# changes. NB we do want to know about revisions by:
# * harvest (harvested metadata)
Expand All @@ -211,20 +289,14 @@ def publisher_activity(organization, include_sub_organizations=False):
)
system_author_template = 'script%' # "%" is a wildcard

created = {'this': [], 'last': []}
modified = {'this': [], 'last': []}

now = datetime.datetime.now()
quarters = get_quarter_dates(now)

if organization:
organization = model.Group.by_name(organization)
if organization_name:
organization = model.Group.by_name(organization_name)
if not organization:
raise p.toolkit.ObjectNotFound()

if not organization:
if not organization_name:
pkgs = model.Session.query(model.Package)\
.all()
.all()
else:
pkgs = model.Session.query(model.Package)
pkgs = lib.filter_by_organizations(pkgs, organization,
Expand Down Expand Up @@ -259,26 +331,26 @@ def publisher_activity(organization, include_sub_organizations=False):
.filter(~model.Revision.author.in_(system_authors))\
.filter(~model.Revision.author.like(system_author_template))

for quarter_name in quarters:
quarter = quarters[quarter_name]
for period_name in periods:
period = periods[period_name]
# created
if quarter[0] < created_.revision_timestamp < quarter[1]:
if period[0] < created_.revision_timestamp < period[1]:
published = not asbool(pkg.extras.get('unpublished'))
created[quarter_name].append(
created[period_name].append(
(created_.name, created_.title, lib.dataset_notes(pkg),
'created', quarter_name,
'created', period_name,
created_.revision_timestamp.isoformat(),
created_.revision.author, published))

# modified
# exclude the creation revision
period_start = max(quarter[0], created_.revision_timestamp)
period_start = max(period[0], created_.revision_timestamp)
prs = pr_q.filter(model.PackageRevision.revision_timestamp > period_start)\
.filter(model.PackageRevision.revision_timestamp < quarter[1])
.filter(model.PackageRevision.revision_timestamp < period[1])
rrs = rr_q.filter(model.ResourceRevision.revision_timestamp > period_start)\
.filter(model.ResourceRevision.revision_timestamp < quarter[1])
.filter(model.ResourceRevision.revision_timestamp < period[1])
pes = pe_q.filter(model.PackageExtraRevision.revision_timestamp > period_start)\
.filter(model.PackageExtraRevision.revision_timestamp < quarter[1])
.filter(model.PackageExtraRevision.revision_timestamp < period[1])
authors = ' '.join(set([r[1].author for r in prs] +
[r[2].author for r in rrs] +
[r[2].author for r in pes]))
Expand All @@ -289,33 +361,23 @@ def publisher_activity(organization, include_sub_organizations=False):
for date in sorted(dates)])
if authors:
published = not asbool(pkg.extras.get('unpublished'))
modified[quarter_name].append(
modified[period_name].append(
(pkg.name, pkg.title, lib.dataset_notes(pkg),
'modified', quarter_name,
'modified', period_name,
dates_formatted, authors, published))
return created, modified

datasets = []
for quarter_name in quarters:
datasets += sorted(created[quarter_name], key=lambda x: x[1])
datasets += sorted(modified[quarter_name], key=lambda x: x[1])
columns = ('Dataset name', 'Dataset title', 'Dataset notes', 'Modified or created', 'Quarter', 'Timestamp', 'Author', 'Published')

quarters_iso = dict([(last_or_this, [date_.isoformat() for date_ in q_list])
for last_or_this, q_list in quarters.iteritems()])

return {'table': datasets, 'columns': columns,
'quarters': quarters_iso}

def publisher_activity_combinations():
for org in lib.all_organizations(include_none=False):
for org in lib.all_organizations(include_none=True):
for include_sub_organizations in (False, True):
yield {'organization': org,
'include_sub_organizations': include_sub_organizations}

publisher_activity_report_info = {
'name': 'publisher-activity',
'description': 'A quarterly list of datasets created and edited by a publisher.',
'option_defaults': OrderedDict((('organization', 'cabinet-office'),
'option_defaults': OrderedDict((('organization', None),
('include_sub_organizations', False),
)),
'option_combinations': publisher_activity_combinations,
Expand Down
41 changes: 39 additions & 2 deletions ckanext/dgu/theme/templates/report/publisher_activity.html
@@ -1,5 +1,40 @@
{% if options['organization'] == None %}

{% for quarter in data['quarters'] %}
<ul>
<li>Period: Current and previous quarter
({{h.render_datetime(data['period'][0])}} -
{{h.render_datetime(data['period'][1])}})</li>
<li>Datasets created: {{data['totals']['num created']}}</li>
<li>Datasets modified: {{data['totals']['num modified']}}</li>
<li>Datasets created or modified: {{data['totals']['total']}}</li>
</ul>
<p>Note: some datasets will be created and modified during the period, hence why 'created or modified' is often less than the sum of the 'created' and 'modified' values.</p>
<table class="table table-bordered table-condensed tablesorter" id="report-table" style="width: 100%; margin-top: 8px;">
<thead>
<tr>
<th style="width: 500px">Publisher</th>
<th style="width: 100px">Datasets created</th>
<th style="width: 100px">Datasets modified</th>
<th style="width: 100px">Created or modified</th>
</tr>
</thead>
<tbody>
{% for row in table %}
<tr>
<td>{{ h.link_to(row['organization title'], h.relative_url_for(organization=row['organization name'])) }}
{{ h.link_to('(publisher)', h.url_for(controller='organization',action='read',id=row['organization name'])) }}
</td>
<td>{{ row['num created'] }}</td>
<td>{{ row['num modified'] }}</td>
<td>{{ row['total'] }}</td>
</tr>
{% endfor %}
</tbody>
</table>

{% else %}

{% for quarter in data['quarters'] %}
<div style="margin-top: 10px; margin-bottom:10px;">
<h3>
{{quarter.capitalize()}} Quarter
Expand Down Expand Up @@ -44,4 +79,6 @@ <h3>
</div>
{% endfor %}
</div>
{% endfor %}
{% endfor %}

{% endif %}

0 comments on commit d46a6ae

Please sign in to comment.