From 0135b1aeb7299e7246881ca8f6c4dec2285e8aa4 Mon Sep 17 00:00:00 2001 From: Daniel Friedman Date: Wed, 6 Apr 2016 15:20:12 -0400 Subject: [PATCH] Fix engagement timeline metrics Now distinguishes between metrics which are aggregated by unique entity id (such as problems or videos) and those which simply add up the total number of interactions, regardless of which module they were acted upon. --- .../constants/engagement_entity_types.py | 6 - .../constants/engagement_events.py | 4 +- .../constants/engagement_types.py | 44 ++++++ analytics_data_api/v0/models.py | 39 +++--- .../tests/views/test_engagement_timelines.py | 128 +++++++++++------- analytics_data_api/v0/views/learners.py | 5 +- 6 files changed, 148 insertions(+), 78 deletions(-) create mode 100644 analytics_data_api/constants/engagement_types.py diff --git a/analytics_data_api/constants/engagement_entity_types.py b/analytics_data_api/constants/engagement_entity_types.py index 3a67f8dd..41260ab9 100644 --- a/analytics_data_api/constants/engagement_entity_types.py +++ b/analytics_data_api/constants/engagement_entity_types.py @@ -6,9 +6,3 @@ PROBLEMS = 'problems' VIDEOS = 'videos' AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS] - -# useful for agregating ModuleEngagement to ModuleEngagementTimeline -SINGULAR_TO_PLURAL = { - PROBLEM: PROBLEMS, - VIDEO: VIDEOS, -} diff --git a/analytics_data_api/constants/engagement_events.py b/analytics_data_api/constants/engagement_events.py index ba78e131..0939f56a 100644 --- a/analytics_data_api/constants/engagement_events.py +++ b/analytics_data_api/constants/engagement_events.py @@ -2,12 +2,12 @@ ATTEMPTED = 'attempted' COMPLETED = 'completed' -CONTRIBUTIONS = 'contributions' +CONTRIBUTED = 'contributed' VIEWED = 'viewed' # map entity types to events EVENTS = { - engagement_entity_types.DISCUSSION: [CONTRIBUTIONS], + engagement_entity_types.DISCUSSION: [CONTRIBUTED], engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED], engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED], engagement_entity_types.VIDEO: [VIEWED], diff --git a/analytics_data_api/constants/engagement_types.py b/analytics_data_api/constants/engagement_types.py new file mode 100644 index 00000000..97d99cd7 --- /dev/null +++ b/analytics_data_api/constants/engagement_types.py @@ -0,0 +1,44 @@ +from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO +from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED + + +class EngagementType(object): + """ + Encapsulates: + - The API consumer-facing display name for engagement types + - The internal question of whether the metric should be counted in terms + of the entity type or the raw number of events. + """ + def __init__(self, entity_type, event_type): + """ + Initializes an EngagementType for a particular entity and event type. + + Arguments: + entity_type (str): the type of module interacted with + event_type (str): the type of interaction on that entity + """ + if entity_type == PROBLEM: + if event_type == ATTEMPTED: + self.name = 'problems_attempted' + self.is_counted_by_entity = True + if event_type == COMPLETED: + self.name = 'problems_completed' + self.is_counted_by_entity = True + elif entity_type == VIDEO: + if event_type == VIEWED: + self.name = 'videos_viewed' + self.is_counted_by_entity = True + elif entity_type == DISCUSSION: + if event_type == CONTRIBUTED: + # Note that the discussion contribution metric counts + # total discussion contributions, not number of + # discussions contributed to. + self.name = 'discussion_contributions' + self.is_counted_by_entity = False + else: + raise ValueError( + 'No display name found for entity type "{entity_type}" and event type "{event_type}"'.format( + entity_type=entity_type, + event_type=event_type, + ) + ) diff --git a/analytics_data_api/v0/models.py b/analytics_data_api/v0/models.py index 1b1fc28a..42a22d2a 100644 --- a/analytics_data_api/v0/models.py +++ b/analytics_data_api/v0/models.py @@ -2,11 +2,12 @@ from django.conf import settings from django.db import models -from django.db.models import Sum +from django.db.models import Count, Sum # some fields (e.g. Float, Integer) are dynamic and your IDE may highlight them as unavailable -from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String +from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String # pylint: disable=no-name-in-module -from analytics_data_api.constants import country, engagement_entity_types, genders, learner +from analytics_data_api.constants import country, genders, learner +from analytics_data_api.constants.engagement_types import EngagementType class CourseActivityWeekly(models.Model): @@ -394,24 +395,28 @@ class ModuleEngagementTimelineManager(models.Manager): def get_timelines(self, course_id, username): queryset = ModuleEngagement.objects.all().filter(course_id=course_id, username=username) \ .values('date', 'entity_type', 'event') \ - .annotate(count=Sum('count')) \ + .annotate(total_count=Sum('count')) \ + .annotate(distinct_entity_count=Count('entity_id')) \ .order_by('date') timelines = [] - for key, group in groupby(queryset, lambda x: (x['date'])): - # Iterate over groups and create a single item with engagement data - item = { - u'date': key, + for date, engagements in groupby(queryset, lambda x: (x['date'])): + # Iterate over engagements for this day and create a single day with + # engagement data. + day = { + u'date': date, } - for engagement in group: - entity_type = engagement_entity_types.SINGULAR_TO_PLURAL.get(engagement['entity_type'], - engagement['entity_type']) - engagement_type = '{}_{}'.format(entity_type, engagement['event']) - count = item.get(engagement_type, 0) - count += engagement['count'] - item[engagement_type] = count - timelines.append(item) + for engagement in engagements: + engagement_type = EngagementType(engagement['entity_type'], engagement['event']) + + if engagement_type.is_counted_by_entity: + count_delta = engagement['distinct_entity_count'] + else: + count_delta = engagement['total_count'] + + day[engagement_type.name] = day.get(engagement_type.name, 0) + count_delta + timelines.append(day) return timelines @@ -422,7 +427,7 @@ class ModuleEngagement(models.Model): course_id = models.CharField(db_index=True, max_length=255) username = models.CharField(max_length=255) date = models.DateTimeField() - # This will be one of "problem", "video" or "forum" + # This will be one of "problem", "video" or "discussion" entity_type = models.CharField(max_length=255) # For problems this will be the usage key, for videos it will be the html encoded module ID, # for forums it will be the commentable_id diff --git a/analytics_data_api/v0/tests/views/test_engagement_timelines.py b/analytics_data_api/v0/tests/views/test_engagement_timelines.py index 32de90b5..356970ac 100644 --- a/analytics_data_api/v0/tests/views/test_engagement_timelines.py +++ b/analytics_data_api/v0/tests/views/test_engagement_timelines.py @@ -1,93 +1,119 @@ import datetime import json +import ddt + from django.utils.http import urlquote from django_dynamic_fixture import G import pytz from rest_framework import status from analyticsdataserver.tests import TestCaseWithAuthentication -from analytics_data_api.constants import engagement_entity_types, engagement_events +from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO +from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED from analytics_data_api.v0 import models from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin +@ddt.ddt class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication): DEFAULT_USERNAME = 'ed_xavier' path_template = '/api/v0/engagement_timelines/{}/?course_id={}' - def _create_engagement(self): - """ Create module engagement data for testing. """ - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, - entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=100) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, - entity_id='some-type-of-id', event=engagement_events.COMPLETED, count=12) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.DISCUSSION, - entity_id='some-type-of-id', event=engagement_events.CONTRIBUTIONS, count=10) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO, - entity_id='some-type-of-id', event=engagement_events.VIEWED, count=44) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, - entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=8) - - def test_timeline(self): - path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id)) - self._create_engagement() - response = self.authenticated_get(path) - self.assertEquals(response.status_code, 200) + def create_engagement(self, entity_type, event_type, entity_id, count, date=None): + """Create a ModuleEngagement model""" + if date is None: + date = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc) + G( + models.ModuleEngagement, + course_id=self.course_id, + username=self.DEFAULT_USERNAME, + date=date, + entity_type=entity_type, + entity_id=entity_id, + event=event_type, + count=count, + ) - expected = { + @ddt.data( + (PROBLEM, ATTEMPTED, 'problems_attempted', True), + (PROBLEM, COMPLETED, 'problems_completed', True), + (VIDEO, VIEWED, 'videos_viewed', True), + (DISCUSSION, CONTRIBUTED, 'discussion_contributions', False), + ) + @ddt.unpack + def test_metric_aggregation(self, entity_type, event_type, metric_display_name, expect_id_aggregation): + """ + Verify that some metrics are counted by unique ID, while some are + counted by total interactions. + """ + self.create_engagement(entity_type, event_type, 'entity-id', count=5) + self.create_engagement(entity_type, event_type, 'entity-id', count=5) + expected_data = { 'days': [ { 'date': '2015-01-01', 'discussion_contributions': 0, - 'problems_attempted': 100, - 'problems_completed': 12, - 'videos_viewed': 0 - }, - { - 'date': '2015-01-02', - 'discussion_contributions': 10, - 'problems_attempted': 8, + 'problems_attempted': 0, 'problems_completed': 0, - 'videos_viewed': 44 - }, + 'videos_viewed': 0, + } ] } - self.assertEquals(response.data, expected) + if expect_id_aggregation: + expected_data['days'][0][metric_display_name] = 2 + else: + expected_data['days'][0][metric_display_name] = 10 + path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id)) + response = self.authenticated_get(path) + self.assertEquals(response.status_code, 200) + self.assertEquals( + response.data, + expected_data + ) - def test_one(self): + def test_timeline(self): + """ + Smoke test the learner engagement timeline. + """ path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id)) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, - entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923) + day_one = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc) + day_two = datetime.datetime(2015, 1, 2, tzinfo=pytz.utc) + self.create_engagement(PROBLEM, ATTEMPTED, 'id-1', count=100, date=day_one) + self.create_engagement(PROBLEM, COMPLETED, 'id-2', count=12, date=day_one) + self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-3', count=6, date=day_one) + self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-4', count=10, date=day_two) + self.create_engagement(VIDEO, VIEWED, 'id-5', count=44, date=day_two) + self.create_engagement(PROBLEM, ATTEMPTED, 'id-6', count=8, date=day_two) + self.create_engagement(PROBLEM, ATTEMPTED, 'id-7', count=4, date=day_two) response = self.authenticated_get(path) self.assertEquals(response.status_code, 200) expected = { 'days': [ { - 'date': '2015-05-28', - 'discussion_contributions': 0, - 'problems_attempted': 6923, - 'problems_completed': 0, + 'date': '2015-01-01', + 'discussion_contributions': 6, + 'problems_attempted': 1, + 'problems_completed': 1, 'videos_viewed': 0 }, + { + 'date': '2015-01-02', + 'discussion_contributions': 10, + 'problems_attempted': 2, + 'problems_completed': 0, + 'videos_viewed': 1 + }, ] } self.assertEquals(response.data, expected) def test_day_gap(self): path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id)) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 5, 26, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO, - entity_id='some-type-of-id', event=engagement_events.VIEWED, count=1) - G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME, - date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM, - entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923) + first_day = datetime.datetime(2015, 5, 26, tzinfo=pytz.utc) + last_day = datetime.datetime(2015, 5, 28, tzinfo=pytz.utc) + self.create_engagement(VIDEO, VIEWED, 'id-1', count=1, date=first_day) + self.create_engagement(PROBLEM, ATTEMPTED, entity_id='id-2', count=1, date=last_day) response = self.authenticated_get(path) self.assertEquals(response.status_code, 200) expected = { @@ -102,7 +128,7 @@ def test_day_gap(self): { 'date': '2015-05-28', 'discussion_contributions': 0, - 'problems_attempted': 6923, + 'problems_attempted': 1, 'problems_completed': 0, 'videos_viewed': 0 }, diff --git a/analytics_data_api/v0/views/learners.py b/analytics_data_api/v0/views/learners.py index 290bd6ce..d8470986 100644 --- a/analytics_data_api/v0/views/learners.py +++ b/analytics_data_api/v0/views/learners.py @@ -286,8 +286,9 @@ class EngagementTimelineView(CourseViewMixin, generics.ListAPIView): a maximum of 1. * problems_completed: Number of unique problems the learner answered correctly. - * discussions_contributed: Number of posts, responses, or - comments the learner contributed to course discussions. + * discussion_contributions: Number of times the learner + contributed to course discussions through posts, responses, + or comments. * videos_viewed: Number of times any course video was played. * problem_attempts_per_completed: Number of attempts per correctly answered problem. If no problems were answered