Skip to content

Commit

Permalink
Fix engagement timeline metrics
Browse files Browse the repository at this point in the history
Now distinguishes between metrics which are aggregated by unique entity
id (such as problems or videos) and those which simply add up the total
number of interactions, regardless of which module they were acted upon.
  • Loading branch information
dan-f committed Apr 7, 2016
1 parent 4752d4c commit 0135b1a
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 78 deletions.
6 changes: 0 additions & 6 deletions analytics_data_api/constants/engagement_entity_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,3 @@
PROBLEMS = 'problems'
VIDEOS = 'videos'
AGGREGATE_TYPES = [DISCUSSION, PROBLEMS, VIDEOS]

# useful for agregating ModuleEngagement to ModuleEngagementTimeline
SINGULAR_TO_PLURAL = {
PROBLEM: PROBLEMS,
VIDEO: VIDEOS,
}
4 changes: 2 additions & 2 deletions analytics_data_api/constants/engagement_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

ATTEMPTED = 'attempted'
COMPLETED = 'completed'
CONTRIBUTIONS = 'contributions'
CONTRIBUTED = 'contributed'
VIEWED = 'viewed'

# map entity types to events
EVENTS = {
engagement_entity_types.DISCUSSION: [CONTRIBUTIONS],
engagement_entity_types.DISCUSSION: [CONTRIBUTED],
engagement_entity_types.PROBLEM: [ATTEMPTED, COMPLETED],
engagement_entity_types.PROBLEMS: [ATTEMPTED, COMPLETED],
engagement_entity_types.VIDEO: [VIEWED],
Expand Down
44 changes: 44 additions & 0 deletions analytics_data_api/constants/engagement_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED


class EngagementType(object):
"""
Encapsulates:
- The API consumer-facing display name for engagement types
- The internal question of whether the metric should be counted in terms
of the entity type or the raw number of events.
"""
def __init__(self, entity_type, event_type):
"""
Initializes an EngagementType for a particular entity and event type.
Arguments:
entity_type (str): the type of module interacted with
event_type (str): the type of interaction on that entity
"""
if entity_type == PROBLEM:
if event_type == ATTEMPTED:
self.name = 'problems_attempted'
self.is_counted_by_entity = True
if event_type == COMPLETED:
self.name = 'problems_completed'
self.is_counted_by_entity = True
elif entity_type == VIDEO:
if event_type == VIEWED:
self.name = 'videos_viewed'
self.is_counted_by_entity = True
elif entity_type == DISCUSSION:
if event_type == CONTRIBUTED:
# Note that the discussion contribution metric counts
# total discussion contributions, not number of
# discussions contributed to.
self.name = 'discussion_contributions'
self.is_counted_by_entity = False
else:
raise ValueError(
'No display name found for entity type "{entity_type}" and event type "{event_type}"'.format(
entity_type=entity_type,
event_type=event_type,
)
)
39 changes: 22 additions & 17 deletions analytics_data_api/v0/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

from django.conf import settings
from django.db import models
from django.db.models import Sum
from django.db.models import Count, Sum
# some fields (e.g. Float, Integer) are dynamic and your IDE may highlight them as unavailable
from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String
from elasticsearch_dsl import Date, DocType, Float, Integer, Q, String # pylint: disable=no-name-in-module

from analytics_data_api.constants import country, engagement_entity_types, genders, learner
from analytics_data_api.constants import country, genders, learner
from analytics_data_api.constants.engagement_types import EngagementType


class CourseActivityWeekly(models.Model):
Expand Down Expand Up @@ -394,24 +395,28 @@ class ModuleEngagementTimelineManager(models.Manager):
def get_timelines(self, course_id, username):
queryset = ModuleEngagement.objects.all().filter(course_id=course_id, username=username) \
.values('date', 'entity_type', 'event') \
.annotate(count=Sum('count')) \
.annotate(total_count=Sum('count')) \
.annotate(distinct_entity_count=Count('entity_id')) \
.order_by('date')

timelines = []

for key, group in groupby(queryset, lambda x: (x['date'])):
# Iterate over groups and create a single item with engagement data
item = {
u'date': key,
for date, engagements in groupby(queryset, lambda x: (x['date'])):
# Iterate over engagements for this day and create a single day with
# engagement data.
day = {
u'date': date,
}
for engagement in group:
entity_type = engagement_entity_types.SINGULAR_TO_PLURAL.get(engagement['entity_type'],
engagement['entity_type'])
engagement_type = '{}_{}'.format(entity_type, engagement['event'])
count = item.get(engagement_type, 0)
count += engagement['count']
item[engagement_type] = count
timelines.append(item)
for engagement in engagements:
engagement_type = EngagementType(engagement['entity_type'], engagement['event'])

if engagement_type.is_counted_by_entity:
count_delta = engagement['distinct_entity_count']
else:
count_delta = engagement['total_count']

day[engagement_type.name] = day.get(engagement_type.name, 0) + count_delta
timelines.append(day)

return timelines

Expand All @@ -422,7 +427,7 @@ class ModuleEngagement(models.Model):
course_id = models.CharField(db_index=True, max_length=255)
username = models.CharField(max_length=255)
date = models.DateTimeField()
# This will be one of "problem", "video" or "forum"
# This will be one of "problem", "video" or "discussion"
entity_type = models.CharField(max_length=255)
# For problems this will be the usage key, for videos it will be the html encoded module ID,
# for forums it will be the commentable_id
Expand Down
128 changes: 77 additions & 51 deletions analytics_data_api/v0/tests/views/test_engagement_timelines.py
Original file line number Diff line number Diff line change
@@ -1,93 +1,119 @@
import datetime
import json

import ddt

from django.utils.http import urlquote
from django_dynamic_fixture import G
import pytz
from rest_framework import status

from analyticsdataserver.tests import TestCaseWithAuthentication
from analytics_data_api.constants import engagement_entity_types, engagement_events
from analytics_data_api.constants.engagement_entity_types import DISCUSSION, PROBLEM, VIDEO
from analytics_data_api.constants.engagement_events import ATTEMPTED, COMPLETED, CONTRIBUTED, VIEWED
from analytics_data_api.v0 import models
from analytics_data_api.v0.tests.views import DemoCourseMixin, VerifyCourseIdMixin


@ddt.ddt
class EngagementTimelineTests(DemoCourseMixin, VerifyCourseIdMixin, TestCaseWithAuthentication):
DEFAULT_USERNAME = 'ed_xavier'
path_template = '/api/v0/engagement_timelines/{}/?course_id={}'

def _create_engagement(self):
""" Create module engagement data for testing. """
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=100)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 1, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.COMPLETED, count=12)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.DISCUSSION,
entity_id='some-type-of-id', event=engagement_events.CONTRIBUTIONS, count=10)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO,
entity_id='some-type-of-id', event=engagement_events.VIEWED, count=44)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 1, 2, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=8)

def test_timeline(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
self._create_engagement()
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
def create_engagement(self, entity_type, event_type, entity_id, count, date=None):
"""Create a ModuleEngagement model"""
if date is None:
date = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
G(
models.ModuleEngagement,
course_id=self.course_id,
username=self.DEFAULT_USERNAME,
date=date,
entity_type=entity_type,
entity_id=entity_id,
event=event_type,
count=count,
)

expected = {
@ddt.data(
(PROBLEM, ATTEMPTED, 'problems_attempted', True),
(PROBLEM, COMPLETED, 'problems_completed', True),
(VIDEO, VIEWED, 'videos_viewed', True),
(DISCUSSION, CONTRIBUTED, 'discussion_contributions', False),
)
@ddt.unpack
def test_metric_aggregation(self, entity_type, event_type, metric_display_name, expect_id_aggregation):
"""
Verify that some metrics are counted by unique ID, while some are
counted by total interactions.
"""
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
self.create_engagement(entity_type, event_type, 'entity-id', count=5)
expected_data = {
'days': [
{
'date': '2015-01-01',
'discussion_contributions': 0,
'problems_attempted': 100,
'problems_completed': 12,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 8,
'problems_attempted': 0,
'problems_completed': 0,
'videos_viewed': 44
},
'videos_viewed': 0,
}
]
}
self.assertEquals(response.data, expected)
if expect_id_aggregation:
expected_data['days'][0][metric_display_name] = 2
else:
expected_data['days'][0][metric_display_name] = 10
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
self.assertEquals(
response.data,
expected_data
)

def test_one(self):
def test_timeline(self):
"""
Smoke test the learner engagement timeline.
"""
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923)
day_one = datetime.datetime(2015, 1, 1, tzinfo=pytz.utc)
day_two = datetime.datetime(2015, 1, 2, tzinfo=pytz.utc)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-1', count=100, date=day_one)
self.create_engagement(PROBLEM, COMPLETED, 'id-2', count=12, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-3', count=6, date=day_one)
self.create_engagement(DISCUSSION, CONTRIBUTED, 'id-4', count=10, date=day_two)
self.create_engagement(VIDEO, VIEWED, 'id-5', count=44, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-6', count=8, date=day_two)
self.create_engagement(PROBLEM, ATTEMPTED, 'id-7', count=4, date=day_two)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
'days': [
{
'date': '2015-05-28',
'discussion_contributions': 0,
'problems_attempted': 6923,
'problems_completed': 0,
'date': '2015-01-01',
'discussion_contributions': 6,
'problems_attempted': 1,
'problems_completed': 1,
'videos_viewed': 0
},
{
'date': '2015-01-02',
'discussion_contributions': 10,
'problems_attempted': 2,
'problems_completed': 0,
'videos_viewed': 1
},
]
}
self.assertEquals(response.data, expected)

def test_day_gap(self):
path = self.path_template.format(self.DEFAULT_USERNAME, urlquote(self.course_id))
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 5, 26, tzinfo=pytz.utc), entity_type=engagement_entity_types.VIDEO,
entity_id='some-type-of-id', event=engagement_events.VIEWED, count=1)
G(models.ModuleEngagement, course_id=self.course_id, username=self.DEFAULT_USERNAME,
date=datetime.datetime(2015, 5, 28, tzinfo=pytz.utc), entity_type=engagement_entity_types.PROBLEM,
entity_id='some-type-of-id', event=engagement_events.ATTEMPTED, count=6923)
first_day = datetime.datetime(2015, 5, 26, tzinfo=pytz.utc)
last_day = datetime.datetime(2015, 5, 28, tzinfo=pytz.utc)
self.create_engagement(VIDEO, VIEWED, 'id-1', count=1, date=first_day)
self.create_engagement(PROBLEM, ATTEMPTED, entity_id='id-2', count=1, date=last_day)
response = self.authenticated_get(path)
self.assertEquals(response.status_code, 200)
expected = {
Expand All @@ -102,7 +128,7 @@ def test_day_gap(self):
{
'date': '2015-05-28',
'discussion_contributions': 0,
'problems_attempted': 6923,
'problems_attempted': 1,
'problems_completed': 0,
'videos_viewed': 0
},
Expand Down
5 changes: 3 additions & 2 deletions analytics_data_api/v0/views/learners.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,9 @@ class EngagementTimelineView(CourseViewMixin, generics.ListAPIView):
a maximum of 1.
* problems_completed: Number of unique problems the learner
answered correctly.
* discussions_contributed: Number of posts, responses, or
comments the learner contributed to course discussions.
* discussion_contributions: Number of times the learner
contributed to course discussions through posts, responses,
or comments.
* videos_viewed: Number of times any course video was played.
* problem_attempts_per_completed: Number of attempts per
correctly answered problem. If no problems were answered
Expand Down

0 comments on commit 0135b1a

Please sign in to comment.