Skip to content

Commit

Permalink
Merge 4a17c73 into fee33f4
Browse files Browse the repository at this point in the history
  • Loading branch information
haikuginger committed Aug 17, 2016
2 parents fee33f4 + 4a17c73 commit 733988d
Show file tree
Hide file tree
Showing 12 changed files with 394 additions and 5 deletions.
146 changes: 146 additions & 0 deletions analytics_data_api/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,36 @@
import datetime
from importlib import import_module
import re

from django.db.models import Q
from django.conf import settings
from django.core.files.storage import default_storage
from django.core.exceptions import SuspiciousFileOperation, SuspiciousOperation
from rest_framework.authtoken.models import Token
from opaque_keys.edx.locator import CourseKey
from opaque_keys import InvalidKeyError

from analytics_data_api.v0.exceptions import (
ReportFileNotFoundError,
CannotCreateReportDownloadLinkError
)


def get_filename_safe_course_id(course_id, replacement_char='_'):
"""
Create a representation of a course_id that can be used safely in a filepath.
"""
try:
course_key = CourseKey.from_string(course_id)
filename = unicode(replacement_char).join([course_key.org, course_key.course, course_key.run])
except InvalidKeyError:
# If the course_id doesn't parse, we will still return a value here.
filename = course_id

# The safest characters are A-Z, a-z, 0-9, <underscore>, <period> and <hyphen>.
# We represent the first four with \w.
# TODO: Once we support courses with unicode characters, we will need to revisit this.
return re.sub(r'[^\w\.\-]', unicode(replacement_char), filename)


def delete_user_auth_token(username):
Expand Down Expand Up @@ -84,3 +112,121 @@ def date_range(start_date, end_date, delta=datetime.timedelta(days=1)):
while cur_date < end_date:
yield cur_date
cur_date += delta


def get_course_report_download_details(course_id, report_name):
"""
Determine the path that the report file should be located at,
then return metadata sufficient for downloading it.
"""
report_location_template = getattr(
settings,
'COURSE_REPORT_FILE_LOCATION_TEMPLATE',
'{course_id}_{report_name}.csv'
)
# Course IDs contain characters that may not be valid in various
# filesystems; here we remove them before looking for the file or
# creating the downloadable filename.
course_id = get_filename_safe_course_id(course_id)
report_location = report_location_template.format(
course_id=course_id,
report_name=report_name
)
try:
if not default_storage.exists(report_location):
raise ReportFileNotFoundError(course_id=course_id, report_name=report_name)
except (
AttributeError,
NotImplementedError,
ImportError,
SuspiciousFileOperation,
SuspiciousOperation
):
# Error out if:
# - We don't have a method to determine file existence
# - Such a method isn't implemented
# - We can't import the specified storage class
# - We don't have privileges for the specified file location
raise CannotCreateReportDownloadLinkError

try:
last_modified = default_storage.modified_time(report_location)
except (NotImplementedError, AttributeError):
last_modified = None

try:
download_size = default_storage.size(report_location)
except (NotImplementedError, AttributeError):
download_size = None

download_filename = '{}-{}-{}.csv'.format(
course_id,
report_name,
# We need a date for the filename; if we don't know when it was last modified,
# use the current date and time to stamp the filename.
(last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ')
)
url, expiration_date = get_file_object_url(report_location, download_filename)

details = {
'course_id': course_id,
'report_name': report_name,
'download_url': url
}
# These are all optional items that aren't guaranteed. The URL isn't guaranteed
# either, but we'll raise an exception earlier if we don't have it.
if last_modified is not None:
details.update({'last_modified': last_modified.strftime(settings.DATETIME_FORMAT)})
if expiration_date is not None:
details.update({'expiration_date': expiration_date.strftime(settings.DATETIME_FORMAT)})
if download_size is not None:
details.update({'file_size': download_size})
return details


def get_file_object_url(filename, download_filename):
"""
Retrieve a download URL for the file, as well as a datetime object
indicating when the URL expires.
We need to pass extra details to the URL method, above and beyond just the
file location, to give us what we need.
This method supports S3 storage's optional response parameters that allow
us to set expiry time, as well as content disposition and content type
on any download made using the generated link.
"""
# Default to expiring the link after two minutes
expire_length = getattr(settings, 'COURSE_REPORT_DOWNLOAD_EXPIRY_TIME', 120)
expires_at = get_expiration_date(expire_length)
try:
url = default_storage.url(
name=filename,
response_headers={
'response-content-disposition': 'attachment; filename={}'.format(download_filename),
'response-content-type': 'text/csv',
# The Expires header requires a very particular timestamp format
'response-expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT')
},
expire=expire_length
)
except TypeError:
# We got a TypeError when calling `.url()`; typically, this means that the arguments
# we passed aren't allowed. Retry with no extra arguments.
try:
url = default_storage.url(name=filename)
expires_at = None
except (AttributeError, TypeError, NotImplementedError):
# Another error, for unknown reasons. Can't recover from this; fail fast
raise CannotCreateReportDownloadLinkError
except (AttributeError, NotImplementedError):
# Either we can't find a .url() method, or we can't use it. Raise an exception.
raise CannotCreateReportDownloadLinkError
return url, expires_at


def get_expiration_date(seconds):
"""
Determine when a given link will expire, based on a given lifetime
"""
return datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds)
23 changes: 23 additions & 0 deletions analytics_data_api/v0/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,26 @@ class ParameterValueError(BaseError):
def __init__(self, message, *args, **kwargs):
super(ParameterValueError, self).__init__(*args, **kwargs)
self.message = message


class ReportFileNotFoundError(BaseError):
"""
Raise if we couldn't find the file we need to produce the report
"""
def __init__(self, *args, **kwargs):
course_id = kwargs.pop('course_id')
report_name = kwargs.pop('report_name')
super(ReportFileNotFoundError, self).__init__(*args, **kwargs)
self.message = self.message_template.format(course_id=course_id, report_name=report_name)

@property
def message_template(self):
return 'Could not find report \'{report_name}\' for course {course_id}.'


class CannotCreateReportDownloadLinkError(BaseError):
"""
Raise if we cannot create a link for the file to be downloaded
"""

message = 'Could not create a downloadable link to the report.'
38 changes: 38 additions & 0 deletions analytics_data_api/v0/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
LearnerEngagementTimelineNotFoundError,
LearnerNotFoundError,
ParameterValueError,
ReportFileNotFoundError,
CannotCreateReportDownloadLinkError,
)


Expand Down Expand Up @@ -129,3 +131,39 @@ def error_code(self):
@property
def status_code(self):
return status.HTTP_400_BAD_REQUEST


class ReportFileNotFoundErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 404 if the report file isn't present
"""

@property
def error(self):
return ReportFileNotFoundError

@property
def error_code(self):
return 'report_file_not_found'

@property
def status_code(self):
return status.HTTP_404_NOT_FOUND


class CannotCreateDownloadLinkErrorMiddleware(BaseProcessErrorMiddleware):
"""
Raise 501 if the filesystem doesn't support creating download links
"""

@property
def error(self):
return CannotCreateReportDownloadLinkError

@property
def error_code(self):
return 'cannot_create_report_download_link'

@property
def status_code(self):
return status.HTTP_501_NOT_IMPLEMENTED
2 changes: 1 addition & 1 deletion analytics_data_api/v0/tests/test_connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_signing(self):
self.assertTrue('my_access_key' in auth_header)

def test_timeout(self):
def fake_connection(_address):
def fake_connection(_address, _timeout):
raise socket.timeout('fake error')
socket.create_connection = fake_connection
connection = ESConnection('mockservice.cc-zone-1.amazonaws.com',
Expand Down
3 changes: 3 additions & 0 deletions analytics_data_api/v0/tests/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from opaque_keys.edx.keys import CourseKey
from rest_framework import status

from analytics_data_api.utils import get_filename_safe_course_id

DEMO_COURSE_ID = u'course-v1:edX+DemoX+Demo_2014'
SANITIZED_DEMO_COURSE_ID = get_filename_safe_course_id(DEMO_COURSE_ID)


class DemoCourseMixin(object):
Expand Down
119 changes: 118 additions & 1 deletion analytics_data_api/v0/tests/views/test_courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@
from django.conf import settings
from django_dynamic_fixture import G
import pytz
from mock import patch, Mock

from analytics_data_api.constants.country import get_country
from analytics_data_api.v0 import models
from analytics_data_api.constants import country, enrollment_modes, genders
from analytics_data_api.v0.models import CourseActivityWeekly
from analytics_data_api.v0.tests.utils import flatten
from analytics_data_api.v0.tests.views import DemoCourseMixin, DEMO_COURSE_ID
from analytics_data_api.v0.tests.views import DemoCourseMixin, DEMO_COURSE_ID, SANITIZED_DEMO_COURSE_ID
from analyticsdataserver.tests import TestCaseWithAuthentication


Expand Down Expand Up @@ -781,3 +782,119 @@ def test_get(self):
def test_get_404(self):
response = self._get_data('foo/bar/course')
self.assertEquals(response.status_code, 404)


class CourseReportDownloadViewTests(DemoCourseMixin, TestCaseWithAuthentication):

path = '/api/v0/courses/{course_id}/reports/{report_name}'

@patch('django.core.files.storage.default_storage.exists', Mock(return_value=False))
def test_report_file_not_found(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 404)

def test_report_not_supported(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='fake_problem_that_we_dont_support'
)
)
self.assertEqual(response.status_code, 404)

@patch('analytics_data_api.utils.default_storage', object())
def test_incompatible_storage_provider(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 501)

@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
@patch(
'django.core.files.storage.default_storage.modified_time',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
@patch('django.core.files.storage.default_storage.size', Mock(return_value=1000))
@patch(
'analytics_data_api.utils.get_expiration_date',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
def test_make_working_link(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 200)
expected = {
'course_id': SANITIZED_DEMO_COURSE_ID,
'report_name': 'problem_response',
'download_url': 'http://fake',
'last_modified': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
'file_size': 1000
}
self.assertEqual(response.data, expected)

@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
@patch(
'django.core.files.storage.default_storage.modified_time',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
@patch('django.core.files.storage.default_storage.size', Mock(side_effect=NotImplementedError()))
@patch(
'analytics_data_api.utils.get_expiration_date',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
def test_make_working_link_with_missing_size(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 200)
expected = {
'course_id': SANITIZED_DEMO_COURSE_ID,
'report_name': 'problem_response',
'download_url': 'http://fake',
'last_modified': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT),
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT)
}
self.assertEqual(response.data, expected)

@patch('django.core.files.storage.default_storage.exists', Mock(return_value=True))
@patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake'))
@patch('django.core.files.storage.default_storage.modified_time', Mock(side_effect=NotImplementedError()))
@patch('django.core.files.storage.default_storage.size', Mock(return_value=1000))
@patch(
'analytics_data_api.utils.get_expiration_date',
Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))
)
def test_make_working_link_with_missing_last_modified_date(self):
response = self.authenticated_get(
self.path.format(
course_id=DEMO_COURSE_ID,
report_name='problem_response'
)
)
self.assertEqual(response.status_code, 200)
expected = {
'course_id': SANITIZED_DEMO_COURSE_ID,
'report_name': 'problem_response',
'download_url': 'http://fake',
'file_size': 1000,
'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime(settings.DATETIME_FORMAT)
}
self.assertEqual(response.data, expected)
Loading

0 comments on commit 733988d

Please sign in to comment.