Skip to content

Commit

Permalink
Merge pull request openedx-unsupported#145 from edx/dsjen/course-list…
Browse files Browse the repository at this point in the history
…-metadata

Adds the course metadata enrollment summary endpoint.
  • Loading branch information
dsjen committed Dec 2, 2016
2 parents d2fcb7b + 8103ad0 commit a4fd62a
Show file tree
Hide file tree
Showing 11 changed files with 487 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import datetime
import logging
from optparse import make_option
import math
import random
from optparse import make_option
from tqdm import tqdm

from django.core.management.base import BaseCommand
from django.utils import timezone
Expand Down Expand Up @@ -87,7 +89,8 @@ def generate_daily_data(self, course_id, start_date, end_date):
models.CourseEnrollmentByGender,
models.CourseEnrollmentByEducation,
models.CourseEnrollmentByBirthYear,
models.CourseEnrollmentByCountry]:
models.CourseEnrollmentByCountry,
models.CourseMetaSummaryEnrollment]:
model.objects.all().delete()

logger.info("Deleted all daily course enrollment data.")
Expand All @@ -98,6 +101,7 @@ def generate_daily_data(self, course_id, start_date, end_date):
date = start_date
cumulative_count = 0

progress = tqdm(total=(end_date - date).days + 2)
while date <= end_date:
daily_total = get_count(daily_total)
models.CourseEnrollmentDaily.objects.create(course_id=course_id, date=date, count=daily_total)
Expand Down Expand Up @@ -128,8 +132,21 @@ def generate_daily_data(self, course_id, start_date, end_date):
models.CourseEnrollmentByBirthYear.objects.create(course_id=course_id, date=date, count=count,
birth_year=birth_year)

progress.update(1)
date = date + datetime.timedelta(days=1)

for mode, ratio in enrollment_mode_ratios.iteritems():
count = int(ratio * daily_total)
cumulative_count = count + random.randint(0, 100)
models.CourseMetaSummaryEnrollment.objects.create(
course_id=course_id, catalog_course_title='Demo Course', catalog_course='Demo_Course',
start_date=timezone.now() - datetime.timedelta(weeks=6),
end_date=timezone.now() + datetime.timedelta(weeks=10),
pacing_type='self_paced', availability='Current', mode=mode, count=count,
cumulative_count=cumulative_count, count_change_7_days=random.randint(-50, 50))

progress.update(1)
progress.close()
logger.info("Done!")

def generate_weekly_data(self, course_id, start_date, end_date):
Expand All @@ -144,6 +161,7 @@ def generate_weekly_data(self, course_id, start_date, end_date):

logger.info("Generating new weekly course activity data...")

progress = tqdm(total=math.ceil((end_date - start).days / 7.0) + 1)
while start < end_date:
active_students = random.randint(100, 4000)
# End date should occur on Saturday at 23:59:59
Expand All @@ -159,8 +177,10 @@ def generate_weekly_data(self, course_id, start_date, end_date):
count=active_students,
interval_start=start, interval_end=end)

progress.update(1)
start = end

progress.close()
logger.info("Done!")

def generate_video_timeline_data(self, video_id):
Expand Down Expand Up @@ -193,6 +213,7 @@ def generate_learner_engagement_data(self, course_id, username, start_date, end_

logger.info("Generating learner engagement module data...")
current = start_date
progress = tqdm(total=(end_date - start_date).days + 1)
while current < end_date:
current = current + datetime.timedelta(days=1)
for metric in engagement_events.INDIVIDUAL_EVENTS:
Expand All @@ -206,7 +227,9 @@ def generate_learner_engagement_data(self, course_id, username, start_date, end_
models.ModuleEngagement.objects.create(
course_id=course_id, username=username, date=current,
entity_type=entity_type, entity_id=entity_id, event=event, count=count)
logger.info("Done!")
progress.update(1)
progress.close()
logger.info("Done!")

def generate_learner_engagement_range_data(self, course_id, start_date, end_date, max_value=100):
logger.info("Deleting engagement range data...")
Expand Down Expand Up @@ -256,7 +279,7 @@ def handle(self, *args, **options):
username = options['username']
video_id = '0fac49ba'
video_module_id = 'i4x-edX-DemoX-video-5c90cffecd9b48b188cbfea176bf7fe9'
start_date = datetime.datetime(year=2016, month=1, day=1, tzinfo=timezone.utc)
start_date = timezone.now() - datetime.timedelta(weeks=10)

num_weeks = options['num_weeks']
if num_weeks:
Expand Down
72 changes: 42 additions & 30 deletions analytics_data_api/v0/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,39 @@
from analytics_data_api.utils import date_range


class CourseActivityWeekly(models.Model):
"""A count of unique users who performed a particular action during a week."""
class BaseCourseModel(models.Model):
course_id = models.CharField(db_index=True, max_length=255)
created = models.DateTimeField(auto_now_add=True)

class Meta(object):
abstract = True


class CourseActivityWeekly(BaseCourseModel):
"""A count of unique users who performed a particular action during a week."""

class Meta(BaseCourseModel.Meta):
db_table = 'course_activity'
index_together = [['course_id', 'activity_type']]
ordering = ('interval_end', 'interval_start', 'course_id')
get_latest_by = 'interval_end'

course_id = models.CharField(db_index=True, max_length=255)
interval_start = models.DateTimeField()
interval_end = models.DateTimeField(db_index=True)
activity_type = models.CharField(db_index=True, max_length=255, db_column='label')
count = models.IntegerField()
created = models.DateTimeField(auto_now_add=True)

@classmethod
def get_most_recent(cls, course_id, activity_type):
"""Activity for the week that was mostly recently computed."""
return cls.objects.filter(course_id=course_id, activity_type=activity_type).latest('interval_end')


class BaseCourseEnrollment(models.Model):
course_id = models.CharField(max_length=255)
class BaseCourseEnrollment(BaseCourseModel):
date = models.DateField(null=False, db_index=True)
count = models.IntegerField(null=False)
created = models.DateTimeField(auto_now_add=True)

class Meta(object):
class Meta(BaseCourseModel.Meta):
abstract = True
get_latest_by = 'date'
index_together = [('course_id', 'date',)]
Expand All @@ -63,6 +67,24 @@ class Meta(BaseCourseEnrollment.Meta):
unique_together = [('course_id', 'date', 'mode')]


class CourseMetaSummaryEnrollment(BaseCourseModel):
catalog_course_title = models.CharField(db_index=True, max_length=255)
catalog_course = models.CharField(db_index=True, max_length=255)
start_date = models.DateTimeField()
end_date = models.DateTimeField()
pacing_type = models.CharField(db_index=True, max_length=255)
availability = models.CharField(db_index=True, max_length=255)
mode = models.CharField(max_length=255)
count = models.IntegerField(null=False)
cumulative_count = models.IntegerField(null=False)
count_change_7_days = models.IntegerField(default=0)

class Meta(BaseCourseModel.Meta):
db_table = 'course_meta_summary_enrollment'
ordering = ('course_id',)
unique_together = [('course_id', 'mode',)]


class CourseEnrollmentByBirthYear(BaseCourseEnrollment):
birth_year = models.IntegerField(null=False)

Expand Down Expand Up @@ -103,14 +125,13 @@ class Meta(BaseCourseEnrollment.Meta):
unique_together = [('course_id', 'date', 'gender')]


class BaseProblemResponseAnswerDistribution(models.Model):
class BaseProblemResponseAnswerDistribution(BaseCourseModel):
""" Base model for the answer_distribution table. """

class Meta(object):
class Meta(BaseCourseModel.Meta):
db_table = 'answer_distribution'
abstract = True

course_id = models.CharField(db_index=True, max_length=255)
module_id = models.CharField(db_index=True, max_length=255)
part_id = models.CharField(db_index=True, max_length=255)
correct = models.NullBooleanField()
Expand All @@ -119,7 +140,6 @@ class Meta(object):
variant = models.IntegerField(null=True)
problem_display_name = models.TextField(null=True)
question_text = models.TextField(null=True)
created = models.DateTimeField(auto_now_add=True)


class ProblemResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
Expand All @@ -131,19 +151,17 @@ class Meta(BaseProblemResponseAnswerDistribution.Meta):
count = models.IntegerField()


class ProblemsAndTags(models.Model):
class ProblemsAndTags(BaseCourseModel):
""" Model for the tags_distribution table """

class Meta(object):
class Meta(BaseCourseModel.Meta):
db_table = 'tags_distribution'

course_id = models.CharField(db_index=True, max_length=255)
module_id = models.CharField(db_index=True, max_length=255)
tag_name = models.CharField(max_length=255)
tag_value = models.CharField(max_length=255)
total_submissions = models.IntegerField(default=0)
correct_submissions = models.IntegerField(default=0)
created = models.DateTimeField(auto_now_add=True)


class ProblemFirstLastResponseAnswerDistribution(BaseProblemResponseAnswerDistribution):
Expand Down Expand Up @@ -172,30 +190,26 @@ class Meta(BaseCourseEnrollment.Meta):
unique_together = [('course_id', 'date', 'country_code')]


class GradeDistribution(models.Model):
class GradeDistribution(BaseCourseModel):
""" Each row stores the count of a particular grade on a module for a given course. """

class Meta(object):
class Meta(BaseCourseModel.Meta):
db_table = 'grade_distribution'

module_id = models.CharField(db_index=True, max_length=255)
course_id = models.CharField(db_index=True, max_length=255)
grade = models.IntegerField()
max_grade = models.IntegerField()
count = models.IntegerField()
created = models.DateTimeField(auto_now_add=True)


class SequentialOpenDistribution(models.Model):
class SequentialOpenDistribution(BaseCourseModel):
""" Each row stores the count of views a particular module has had in a given course. """

class Meta(object):
class Meta(BaseCourseModel.Meta):
db_table = 'sequential_open_distribution'

module_id = models.CharField(db_index=True, max_length=255)
course_id = models.CharField(db_index=True, max_length=255)
count = models.IntegerField()
created = models.DateTimeField(auto_now_add=True)


class BaseVideo(models.Model):
Expand Down Expand Up @@ -465,10 +479,9 @@ def get_timeline(self, course_id, username):
return full_timeline


class ModuleEngagement(models.Model):
class ModuleEngagement(BaseCourseModel):
"""User interactions with entities within the courseware."""

course_id = models.CharField(db_index=True, max_length=255)
username = models.CharField(max_length=255)
date = models.DateField()
# This will be one of "problem", "video" or "discussion"
Expand All @@ -483,18 +496,17 @@ class ModuleEngagement(models.Model):

objects = ModuleEngagementTimelineManager()

class Meta(object):
class Meta(BaseCourseModel.Meta):
db_table = 'module_engagement'


class ModuleEngagementMetricRanges(models.Model):
class ModuleEngagementMetricRanges(BaseCourseModel):
"""
Represents the low and high values for a module engagement entity and event
pair, known as the metric. The range_type will either be low, normal, or
high, bounded by low_value and high_value.
"""

course_id = models.CharField(db_index=True, max_length=255)
start_date = models.DateField()
# This is a left-closed interval. No data from the end_date is included in the analysis.
end_date = models.DateField()
Expand All @@ -505,5 +517,5 @@ class ModuleEngagementMetricRanges(models.Model):
high_value = models.FloatField()
low_value = models.FloatField()

class Meta(object):
class Meta(BaseCourseModel.Meta):
db_table = 'module_engagement_metric_ranges'
47 changes: 47 additions & 0 deletions analytics_data_api/v0/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,3 +507,50 @@ def get_engagement_ranges(self, obj):
})

return engagement_ranges


class DynamicFieldsModelSerializer(serializers.ModelSerializer):
"""
A ModelSerializer that takes an additional `fields` argument that controls which
fields should be displayed.
Blatantly taken from http://www.django-rest-framework.org/api-guide/serializers/#dynamically-modifying-fields
"""

def __init__(self, *args, **kwargs):
# Don't pass the 'fields' arg up to the superclass
fields = kwargs.pop('fields', None)

# Instantiate the superclass normally
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)

if fields is not None:
# Drop any fields that are not specified in the `fields` argument.
allowed = set(fields)
existing = set(self.fields.keys())
for field_name in existing - allowed:
self.fields.pop(field_name)


class CourseMetaSummaryEnrollmentSerializer(ModelSerializerWithCreatedField, DynamicFieldsModelSerializer):
"""
Serializer for course and enrollment counts per mode.
"""
course_id = serializers.CharField()
catalog_course_title = serializers.CharField()
catalog_course = serializers.CharField()
start_date = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
end_date = serializers.DateTimeField(format=settings.DATETIME_FORMAT)
pacing_type = serializers.CharField()
availability = serializers.CharField()
count = serializers.IntegerField(default=0)
cumulative_count = serializers.IntegerField(default=0)
count_change_7_days = serializers.IntegerField(default=0)
modes = serializers.SerializerMethodField()

def get_modes(self, obj):
return obj.get('modes', None)

class Meta(object):
model = models.CourseMetaSummaryEnrollment
exclude = ('id', 'mode')

0 comments on commit a4fd62a

Please sign in to comment.