From 8377d3cdf9b7b665d5201a6efb43423f4b8952bb Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Tue, 2 Aug 2016 08:26:06 -0400 Subject: [PATCH 1/6] Basic implementation of report download API endpoint --- analytics_data_api/utils.py | 91 ++++++++++++++++++++++++++ analytics_data_api/v0/urls/courses.py | 1 + analytics_data_api/v0/views/courses.py | 39 ++++++++++- requirements/base.txt | 1 + 4 files changed, 131 insertions(+), 1 deletion(-) diff --git a/analytics_data_api/utils.py b/analytics_data_api/utils.py index 5d917d48..d7ab4174 100644 --- a/analytics_data_api/utils.py +++ b/analytics_data_api/utils.py @@ -1,9 +1,17 @@ import datetime from importlib import import_module +from collections import defaultdict from django.db.models import Q +from django.conf import settings +from django.core.files.storage import default_storage from rest_framework.authtoken.models import Token +from analytics_data_api.v0.models import ProblemResponseAnswerDistribution + +AWS_S3_FILE_STORAGE_CLASS = 'storages.backends.s3boto.S3BotoStorage' +ISO_8601_FORMAT_STRING = '%Y-%m-%dT%H:%M:%SZ' + def delete_user_auth_token(username): """ @@ -84,3 +92,86 @@ def date_range(start_date, end_date, delta=datetime.timedelta(days=1)): while cur_date < end_date: yield cur_date cur_date += delta + + +def get_course_report_download_details(course_id, report_name): + """ + Determine the path that the report file should be located at, + then return metadata sufficient for downloading it. + """ + report_location = settings.COURSE_REPORT_FILE_LOCATION_TEMPLATE.format( + course_id=course_id, + report_name=report_name + ) + + if not default_storage.exists(report_location): + raise IOError('Report file does not exist') + + try: + last_modified = default_storage.modified_time(report_location) + except NotImplementedError: + last_modified = None + + try: + download_size = default_storage.size(report_location) + except NotImplementedError: + download_size = None + + # Note that course IDs contain characters that aren't valid for filenames in + # Windows. However, we'll trust the client to save them with the appropriate + # alternate naming scheme (replace ':' with '_' and so on) + download_filename = '{}_{}_{}.csv'.format( + course_id, + report_name, + # We need a date for the filename; if we don't know when it was last modified, + # use the current date and time to stamp the filename. + (last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%S.%f.csv') + ) + url, expiration_time = get_file_object_url(report_location, download_filename) + + details = { + 'course_id': course_id, + 'report_name': report_name + 'download_url': url + } + # These are all optional items that aren't guaranteed. The URL isn't guaranteed + # either, but we'll raise an exception earlier if we don't have it. + if last_modified is not None: + details.update({'last_modified': last_modified.strftime(ISO_8601_FORMAT_STRING)}) + if expiration_time is not None: + details.update({'expiration_date': expiration_time.strftime(ISO_8601_FORMAT_STRING)}) + if download_size is not None: + details.update({'file_size': download_size}) + return details + + +def get_file_object_url(filename, download_filename): + """ + Retrieve a download URL for the file, as well as a datetime object + indicating when the URL expires. + + If the file is an AWS S3 file, we need to pass extra details to the + URL method to give us what we need; otherwise, we just call .url() + """ + expire_length = settings.COURSE_REPORT_DOWNLOAD_EXPIRY_TIME + expires_at = datetime.datetime.utcnow() + datetime.timedelta(expires_length) + # S3 is currently the only scheme we can set an expiration date and other details for + if settings.DEFAULT_FILE_STORAGE != AWS_S3_FILE_STORAGE_CLASS: + if hasattr(default_storage, 'url'): + return default_storage.url(filename), None + else: + # Catch this higher in the stack to emit an appropriate error - this + # fundamentally means we can't produce a report download URL. + raise NotImplementedError + url = default_storage.url( + name=filename, + response_headers={ + 'Content-Disposition': 'attachment; filename="{}"'.format(download_filename), + 'Content-Type': 'text/csv', + # The Expires header requires a very particular timestamp format + 'Expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT') + }, + expire=expire_length + ) + return url, expires_at + diff --git a/analytics_data_api/v0/urls/courses.py b/analytics_data_api/v0/urls/courses.py index d2190958..7008b237 100644 --- a/analytics_data_api/v0/urls/courses.py +++ b/analytics_data_api/v0/urls/courses.py @@ -15,6 +15,7 @@ ('problems', views.ProblemsListView, 'problems'), ('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'), ('videos', views.VideosListView, 'videos') + ('reports/(?P[a-zA-Z0-9_]+)', views.ReportDownloadView, 'reports') ] urlpatterns = [] diff --git a/analytics_data_api/v0/views/courses.py b/analytics_data_api/v0/views/courses.py index 08be069f..50d83107 100644 --- a/analytics_data_api/v0/views/courses.py +++ b/analytics_data_api/v0/views/courses.py @@ -9,10 +9,11 @@ from django.http import Http404 from django.utils.timezone import make_aware, utc from rest_framework import generics +from rest_framework.views import APIView from opaque_keys.edx.keys import CourseKey from analytics_data_api.constants import enrollment_modes -from analytics_data_api.utils import dictfetchall +from analytics_data_api.utils import dictfetchall, get_course_report_download_details from analytics_data_api.v0 import models, serializers @@ -763,3 +764,39 @@ class VideosListView(BaseCourseView): def apply_date_filtering(self, queryset): # no date filtering for videos -- just return the queryset return queryset + + +class ReportDownloadView(APIView): + """ + Get information needed to download a CSV report + + **Example request** + + GET /api/v0/courses/{course_id}/reports/{report_name}/ + + **Response Values** + + Returns a single object with data about the report, with the following data: + + * course_id: The ID of the course + * report_name: The name of the report + * download_url: The Internet location from which the report can be downloaded + + The object may also return these items, if supported by the storage backend: + + * last_modified: The date the report was last updated + * expiration_date: The date through which the link will be valid + * file_size: The size in bytes of the CSV download + """ + + def get(request, course_id, report_name): + try: + response = get_course_report_download_details(course_id, report_name) + except NotImplementedError: + # We want a custom error here to indicate that the filesystem that's + # being used doesn't support the creation of an external URL to the file + # I think the right error for the job is a 501 Not Implemented. Http404 for now + raise Http404 + except IOError: + raise Http404 + return Reponse(response) diff --git a/requirements/base.txt b/requirements/base.txt index cc90d90e..dce20921 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -12,3 +12,4 @@ elasticsearch-dsl==0.0.11 # Apache 2.0 Markdown==2.6 # BSD -e git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys +django-storages==1.4.1 # BSD From 97af24c41f94cb8c3f0ed2cb15df2f243b0e5f0c Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Tue, 2 Aug 2016 09:55:45 -0400 Subject: [PATCH 2/6] Adding custom exceptions --- analytics_data_api/utils.py | 9 ++++++--- analytics_data_api/v0/exceptions.py | 23 +++++++++++++++++++++++ analytics_data_api/v0/views/courses.py | 7 ------- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/analytics_data_api/utils.py b/analytics_data_api/utils.py index d7ab4174..67992996 100644 --- a/analytics_data_api/utils.py +++ b/analytics_data_api/utils.py @@ -8,6 +8,10 @@ from rest_framework.authtoken.models import Token from analytics_data_api.v0.models import ProblemResponseAnswerDistribution +from analytics_data_api.v0.exceptions import ( + ReportFileNotFoundError, + CannotCreateReportDownloadLinkError +) AWS_S3_FILE_STORAGE_CLASS = 'storages.backends.s3boto.S3BotoStorage' ISO_8601_FORMAT_STRING = '%Y-%m-%dT%H:%M:%SZ' @@ -105,7 +109,7 @@ def get_course_report_download_details(course_id, report_name): ) if not default_storage.exists(report_location): - raise IOError('Report file does not exist') + raise ReportFileNotFoundError(course_id=course_id, report_name=report_name) try: last_modified = default_storage.modified_time(report_location) @@ -162,7 +166,7 @@ def get_file_object_url(filename, download_filename): else: # Catch this higher in the stack to emit an appropriate error - this # fundamentally means we can't produce a report download URL. - raise NotImplementedError + raise CannotCreateReportDownloadLinkError url = default_storage.url( name=filename, response_headers={ @@ -174,4 +178,3 @@ def get_file_object_url(filename, download_filename): expire=expire_length ) return url, expires_at - diff --git a/analytics_data_api/v0/exceptions.py b/analytics_data_api/v0/exceptions.py index 7dfd1448..2b671050 100644 --- a/analytics_data_api/v0/exceptions.py +++ b/analytics_data_api/v0/exceptions.py @@ -72,3 +72,26 @@ class ParameterValueError(BaseError): def __init__(self, message, *args, **kwargs): super(ParameterValueError, self).__init__(*args, **kwargs) self.message = message + + +class ReportFileNotFoundError(BaseError): + """ + Raise if we couldn't find the file we need to produce the report + """ + def __init__(self, *args, **kwargs): + course_id = kwargs.pop('course_id') + report_name = kwargs.pop('report_name') + super(ReportFileNotFoundError, self).__init__(*args, **kwargs) + self.message = self.message_template.format(course_id=course_id, report_name=report_name) + + @property + def message_template(self): + return 'Could not find report "{report_name}" for course {course_id}.' + + +class CannotCreateReportDownloadLinkError(BaseError): + """ + Raise if we cannot create a link for the file to be downloaded + """ + + message = 'Could not create a downloadable link to the report.' \ No newline at end of file diff --git a/analytics_data_api/v0/views/courses.py b/analytics_data_api/v0/views/courses.py index 50d83107..97c7467c 100644 --- a/analytics_data_api/v0/views/courses.py +++ b/analytics_data_api/v0/views/courses.py @@ -792,11 +792,4 @@ class ReportDownloadView(APIView): def get(request, course_id, report_name): try: response = get_course_report_download_details(course_id, report_name) - except NotImplementedError: - # We want a custom error here to indicate that the filesystem that's - # being used doesn't support the creation of an external URL to the file - # I think the right error for the job is a 501 Not Implemented. Http404 for now - raise Http404 - except IOError: - raise Http404 return Reponse(response) From 6704c624f35b1aa04c9d72459ae006a68a5c364d Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Tue, 2 Aug 2016 10:14:30 -0400 Subject: [PATCH 3/6] Bugfixes; custom exception middleware --- analytics_data_api/utils.py | 37 ++++++++++++------------- analytics_data_api/v0/exceptions.py | 4 +-- analytics_data_api/v0/middleware.py | 38 ++++++++++++++++++++++++++ analytics_data_api/v0/urls/courses.py | 2 +- analytics_data_api/v0/views/courses.py | 13 +++++++-- analyticsdataserver/settings/base.py | 3 ++ requirements/base.txt | 2 +- 7 files changed, 73 insertions(+), 26 deletions(-) diff --git a/analytics_data_api/utils.py b/analytics_data_api/utils.py index 67992996..0dfb663a 100644 --- a/analytics_data_api/utils.py +++ b/analytics_data_api/utils.py @@ -1,13 +1,11 @@ import datetime from importlib import import_module -from collections import defaultdict from django.db.models import Q from django.conf import settings from django.core.files.storage import default_storage from rest_framework.authtoken.models import Token -from analytics_data_api.v0.models import ProblemResponseAnswerDistribution from analytics_data_api.v0.exceptions import ( ReportFileNotFoundError, CannotCreateReportDownloadLinkError @@ -124,22 +122,28 @@ def get_course_report_download_details(course_id, report_name): # Note that course IDs contain characters that aren't valid for filenames in # Windows. However, we'll trust the client to save them with the appropriate # alternate naming scheme (replace ':' with '_' and so on) - download_filename = '{}_{}_{}.csv'.format( + download_filename = '{}-{}-{}.csv'.format( course_id, report_name, # We need a date for the filename; if we don't know when it was last modified, # use the current date and time to stamp the filename. - (last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%S.%f.csv') + (last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%S') + ).replace( + ':', '-' + ).replace( + '+', '-' ) url, expiration_time = get_file_object_url(report_location, download_filename) details = { 'course_id': course_id, - 'report_name': report_name + 'report_name': report_name, 'download_url': url } # These are all optional items that aren't guaranteed. The URL isn't guaranteed - # either, but we'll raise an exception earlier if we don't have it. + # either, but we'll raise an exception earlier if we don't have it. Currently, support + # is only present for S3, which has all these data types, but this future-proofs + # us for scenarios where we might have a storage provider that doesn't support them. if last_modified is not None: details.update({'last_modified': last_modified.strftime(ISO_8601_FORMAT_STRING)}) if expiration_time is not None: @@ -154,26 +158,21 @@ def get_file_object_url(filename, download_filename): Retrieve a download URL for the file, as well as a datetime object indicating when the URL expires. - If the file is an AWS S3 file, we need to pass extra details to the - URL method to give us what we need; otherwise, we just call .url() + We need to pass extra details to the URL method, above and beyond just the + file location, to give us what we need. """ expire_length = settings.COURSE_REPORT_DOWNLOAD_EXPIRY_TIME - expires_at = datetime.datetime.utcnow() + datetime.timedelta(expires_length) - # S3 is currently the only scheme we can set an expiration date and other details for + expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=expire_length) if settings.DEFAULT_FILE_STORAGE != AWS_S3_FILE_STORAGE_CLASS: - if hasattr(default_storage, 'url'): - return default_storage.url(filename), None - else: - # Catch this higher in the stack to emit an appropriate error - this - # fundamentally means we can't produce a report download URL. - raise CannotCreateReportDownloadLinkError + # At present, we do not support linking from storage other than S3. + raise CannotCreateReportDownloadLinkError url = default_storage.url( name=filename, response_headers={ - 'Content-Disposition': 'attachment; filename="{}"'.format(download_filename), - 'Content-Type': 'text/csv', + 'response-content-disposition': 'attachment; filename={}'.format(download_filename), + 'response-content-type': 'text/csv', # The Expires header requires a very particular timestamp format - 'Expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT') + 'response-expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT') }, expire=expire_length ) diff --git a/analytics_data_api/v0/exceptions.py b/analytics_data_api/v0/exceptions.py index 2b671050..75037b13 100644 --- a/analytics_data_api/v0/exceptions.py +++ b/analytics_data_api/v0/exceptions.py @@ -86,7 +86,7 @@ def __init__(self, *args, **kwargs): @property def message_template(self): - return 'Could not find report "{report_name}" for course {course_id}.' + return 'Could not find report \'{report_name}\' for course {course_id}.' class CannotCreateReportDownloadLinkError(BaseError): @@ -94,4 +94,4 @@ class CannotCreateReportDownloadLinkError(BaseError): Raise if we cannot create a link for the file to be downloaded """ - message = 'Could not create a downloadable link to the report.' \ No newline at end of file + message = 'Could not create a downloadable link to the report.' diff --git a/analytics_data_api/v0/middleware.py b/analytics_data_api/v0/middleware.py index eeda260f..b1ce0b12 100644 --- a/analytics_data_api/v0/middleware.py +++ b/analytics_data_api/v0/middleware.py @@ -8,6 +8,8 @@ LearnerEngagementTimelineNotFoundError, LearnerNotFoundError, ParameterValueError, + ReportFileNotFoundError, + CannotCreateReportDownloadLinkError, ) @@ -129,3 +131,39 @@ def error_code(self): @property def status_code(self): return status.HTTP_400_BAD_REQUEST + + +class ReportFileNotFoundErrorMiddleware(BaseProcessErrorMiddleware): + """ + Raise 404 if the report file isn't present + """ + + @property + def error(self): + return ReportFileNotFoundError + + @property + def error_code(self): + return 'report_file_not_found' + + @property + def status_code(self): + return status.HTTP_404_NOT_FOUND + + +class CannotCreateDownloadLinkErrorMiddleware(BaseProcessErrorMiddleware): + """ + Raise 501 if the filesystem doesn't support creating download links + """ + + @property + def error(self): + return CannotCreateReportDownloadLinkError + + @property + def error_code(self): + return 'cannot_create_report_download_link' + + @property + def status_code(self): + return status.HTTP_501_NOT_IMPLEMENTED diff --git a/analytics_data_api/v0/urls/courses.py b/analytics_data_api/v0/urls/courses.py index 7008b237..27753439 100644 --- a/analytics_data_api/v0/urls/courses.py +++ b/analytics_data_api/v0/urls/courses.py @@ -14,7 +14,7 @@ ('enrollment/location', views.CourseEnrollmentByLocationView, 'enrollment_by_location'), ('problems', views.ProblemsListView, 'problems'), ('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'), - ('videos', views.VideosListView, 'videos') + ('videos', views.VideosListView, 'videos'), ('reports/(?P[a-zA-Z0-9_]+)', views.ReportDownloadView, 'reports') ] diff --git a/analytics_data_api/v0/views/courses.py b/analytics_data_api/v0/views/courses.py index 97c7467c..cc45b72f 100644 --- a/analytics_data_api/v0/views/courses.py +++ b/analytics_data_api/v0/views/courses.py @@ -11,10 +11,12 @@ from rest_framework import generics from rest_framework.views import APIView from opaque_keys.edx.keys import CourseKey +from rest_framework.response import Response from analytics_data_api.constants import enrollment_modes from analytics_data_api.utils import dictfetchall, get_course_report_download_details from analytics_data_api.v0 import models, serializers +from analytics_data_api.v0.exceptions import ReportFileNotFoundError class BaseCourseView(generics.ListAPIView): @@ -788,8 +790,13 @@ class ReportDownloadView(APIView): * expiration_date: The date through which the link will be valid * file_size: The size in bytes of the CSV download """ + enabled_reports = ( + 'problem_response' + ) - def get(request, course_id, report_name): - try: + def get(self, request, course_id, report_name, *args, **kwargs): + if report_name in self.enabled_reports: response = get_course_report_download_details(course_id, report_name) - return Reponse(response) + return Response(response) + else: + raise ReportFileNotFoundError(course_id=course_id, report_name=report_name) diff --git a/analyticsdataserver/settings/base.py b/analyticsdataserver/settings/base.py index 7dcd4044..8550bee9 100644 --- a/analyticsdataserver/settings/base.py +++ b/analyticsdataserver/settings/base.py @@ -180,6 +180,8 @@ 'analytics_data_api.v0.middleware.CourseNotSpecifiedErrorMiddleware', 'analytics_data_api.v0.middleware.CourseKeyMalformedErrorMiddleware', 'analytics_data_api.v0.middleware.ParameterValueErrorMiddleware', + 'analytics_data_api.v0.middleware.ReportFileNotFoundErrorMiddleware', + 'analytics_data_api.v0.middleware.CannotCreateDownloadLinkErrorMiddleware', ) ########## END MIDDLEWARE CONFIGURATION @@ -207,6 +209,7 @@ 'rest_framework.authtoken', 'rest_framework_swagger', 'django_countries', + 'storages' ) LOCAL_APPS = ( diff --git a/requirements/base.txt b/requirements/base.txt index dce20921..15d267fc 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,4 +1,4 @@ -boto==2.22.1 # MIT +boto==2.42.0 # MIT Django==1.8.13 # BSD License django-model-utils==2.2 # BSD djangorestframework==2.4.4 # BSD From 532a6e1c4c239aab01451a44ea5eaee919569ea0 Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Wed, 3 Aug 2016 11:19:57 -0400 Subject: [PATCH 4/6] Adding tests --- analytics_data_api/utils.py | 28 +++-- .../v0/tests/test_connections.py | 2 +- .../v0/tests/views/test_courses.py | 110 ++++++++++++++++++ analyticsdataserver/settings/test.py | 10 ++ 4 files changed, 138 insertions(+), 12 deletions(-) diff --git a/analytics_data_api/utils.py b/analytics_data_api/utils.py index 0dfb663a..27a8c800 100644 --- a/analytics_data_api/utils.py +++ b/analytics_data_api/utils.py @@ -101,22 +101,26 @@ def get_course_report_download_details(course_id, report_name): Determine the path that the report file should be located at, then return metadata sufficient for downloading it. """ - report_location = settings.COURSE_REPORT_FILE_LOCATION_TEMPLATE.format( + report_location_template = getattr(settings, 'COURSE_REPORT_FILE_LOCATION_TEMPLATE') + report_location = report_location_template.format( course_id=course_id, report_name=report_name ) + if getattr(settings, 'DEFAULT_FILE_STORAGE', None) != AWS_S3_FILE_STORAGE_CLASS: + # At present, we do not support linking from storage other than S3. + raise CannotCreateReportDownloadLinkError if not default_storage.exists(report_location): raise ReportFileNotFoundError(course_id=course_id, report_name=report_name) try: last_modified = default_storage.modified_time(report_location) - except NotImplementedError: + except (NotImplementedError, AttributeError): last_modified = None try: download_size = default_storage.size(report_location) - except NotImplementedError: + except (NotImplementedError, AttributeError): download_size = None # Note that course IDs contain characters that aren't valid for filenames in @@ -133,7 +137,7 @@ def get_course_report_download_details(course_id, report_name): ).replace( '+', '-' ) - url, expiration_time = get_file_object_url(report_location, download_filename) + url, expiration_date = get_file_object_url(report_location, download_filename) details = { 'course_id': course_id, @@ -146,8 +150,8 @@ def get_course_report_download_details(course_id, report_name): # us for scenarios where we might have a storage provider that doesn't support them. if last_modified is not None: details.update({'last_modified': last_modified.strftime(ISO_8601_FORMAT_STRING)}) - if expiration_time is not None: - details.update({'expiration_date': expiration_time.strftime(ISO_8601_FORMAT_STRING)}) + if expiration_date is not None: + details.update({'expiration_date': expiration_date.strftime(ISO_8601_FORMAT_STRING)}) if download_size is not None: details.update({'file_size': download_size}) return details @@ -161,11 +165,9 @@ def get_file_object_url(filename, download_filename): We need to pass extra details to the URL method, above and beyond just the file location, to give us what we need. """ - expire_length = settings.COURSE_REPORT_DOWNLOAD_EXPIRY_TIME - expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=expire_length) - if settings.DEFAULT_FILE_STORAGE != AWS_S3_FILE_STORAGE_CLASS: - # At present, we do not support linking from storage other than S3. - raise CannotCreateReportDownloadLinkError + # Default to expiring the link after one day + expire_length = getattr(settings, 'COURSE_REPORT_DOWNLOAD_EXPIRY_TIME', 86400) + expires_at = get_expiration_date(expire_length) url = default_storage.url( name=filename, response_headers={ @@ -177,3 +179,7 @@ def get_file_object_url(filename, download_filename): expire=expire_length ) return url, expires_at + + +def get_expiration_date(seconds): + return datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds) diff --git a/analytics_data_api/v0/tests/test_connections.py b/analytics_data_api/v0/tests/test_connections.py index 3c58737f..7cfacc6a 100644 --- a/analytics_data_api/v0/tests/test_connections.py +++ b/analytics_data_api/v0/tests/test_connections.py @@ -33,7 +33,7 @@ def test_signing(self): self.assertTrue('my_access_key' in auth_header) def test_timeout(self): - def fake_connection(_address): + def fake_connection(_address, timeout): raise socket.timeout('fake error') socket.create_connection = fake_connection connection = ESConnection('mockservice.cc-zone-1.amazonaws.com', diff --git a/analytics_data_api/v0/tests/views/test_courses.py b/analytics_data_api/v0/tests/views/test_courses.py index 2441df53..39f43011 100644 --- a/analytics_data_api/v0/tests/views/test_courses.py +++ b/analytics_data_api/v0/tests/views/test_courses.py @@ -12,6 +12,7 @@ from django.conf import settings from django_dynamic_fixture import G import pytz +from mock import patch, Mock from analytics_data_api.constants.country import get_country from analytics_data_api.v0 import models @@ -781,3 +782,112 @@ def test_get(self): def test_get_404(self): response = self._get_data('foo/bar/course') self.assertEquals(response.status_code, 404) + + +class CourseReportDownloadViewTests(DemoCourseMixin, TestCaseWithAuthentication): + + path = '/api/v0/courses/{course_id}/reports/{report_name}' + + def wrap_get_file_object_url(function): + def replace_second_return(*args, **kwargs): + x, y = function(*args, **kwargs) + y = datetime.datetime(2014, 1, 1, tzinfo=pytz.utc) + return x, y + return replace_second_return + + @patch('django.core.files.storage.default_storage.exists', Mock(return_value=False)) + def test_report_file_not_found(self): + response = self.authenticated_get( + self.path.format( + course_id=DEMO_COURSE_ID, + report_name='problem_response' + ) + ) + self.assertEqual(response.status_code, 404) + + def test_report_not_supported(self): + response = self.authenticated_get( + self.path.format( + course_id=DEMO_COURSE_ID, + report_name='fake_problem_that_we_dont_support' + ) + ) + self.assertEqual(response.status_code, 404) + + @patch('django.conf.settings.DEFAULT_FILE_STORAGE', 'storages.backends.ftp.FTPStorage') + def test_storage_provider_other_than_s3(self): + response = self.authenticated_get( + self.path.format( + course_id=DEMO_COURSE_ID, + report_name='problem_response' + ) + ) + print(response) + self.assertEqual(response.status_code, 501) + + @patch('django.core.files.storage.default_storage.exists', Mock(return_value=True)) + @patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake')) + @patch('django.core.files.storage.default_storage.modified_time', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch('django.core.files.storage.default_storage.size', Mock(return_value=1000)) + @patch('analytics_data_api.utils.get_expiration_date', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + def test_make_working_link(self): + response = self.authenticated_get( + self.path.format( + course_id=DEMO_COURSE_ID, + report_name='problem_response' + ) + ) + self.assertEqual(response.status_code, 200) + expected = { + 'course_id': DEMO_COURSE_ID, + 'report_name': 'problem_response', + 'download_url': 'http://fake', + 'last_modified': '2014-01-01T00:00:00Z', + 'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), + 'file_size': 1000 + } + self.assertEqual(response.data, expected) + + @patch('django.core.files.storage.default_storage.exists', Mock(return_value=True)) + @patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake')) + @patch('django.core.files.storage.default_storage.modified_time', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch('django.core.files.storage.default_storage.size', Mock(side_effect=NotImplementedError())) + @patch('analytics_data_api.utils.get_expiration_date', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + def test_make_working_link_with_missing_size(self): + response = self.authenticated_get( + self.path.format( + course_id=DEMO_COURSE_ID, + report_name='problem_response' + ) + ) + self.assertEqual(response.status_code, 200) + expected = { + 'course_id': DEMO_COURSE_ID, + 'report_name': 'problem_response', + 'download_url': 'http://fake', + 'last_modified': '2014-01-01T00:00:00Z', + 'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime('%Y-%m-%dT%H:%M:%SZ') + } + self.assertEqual(response.data, expected) + + @patch('django.core.files.storage.default_storage.exists', Mock(return_value=True)) + @patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake')) + @patch('django.core.files.storage.default_storage.modified_time', Mock(side_effect=NotImplementedError())) + @patch('django.core.files.storage.default_storage.size', Mock(return_value=1000)) + @patch('analytics_data_api.utils.get_expiration_date', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + def test_make_working_link_with_missing_last_modified_date(self): + response = self.authenticated_get( + self.path.format( + course_id=DEMO_COURSE_ID, + report_name='problem_response' + ) + ) + self.assertEqual(response.status_code, 200) + expected = { + 'course_id': DEMO_COURSE_ID, + 'report_name': 'problem_response', + 'download_url': 'http://fake', + 'file_size': 1000, + 'expiration_date': datetime.datetime(2014, 1, 1, tzinfo=pytz.utc).strftime('%Y-%m-%dT%H:%M:%SZ') + } + self.assertEqual(response.data, expected) diff --git a/analyticsdataserver/settings/test.py b/analyticsdataserver/settings/test.py index e854a832..d23fd844 100644 --- a/analyticsdataserver/settings/test.py +++ b/analyticsdataserver/settings/test.py @@ -29,3 +29,13 @@ ELASTICSEARCH_LEARNERS_HOST = 'http://localhost:9223/' ELASTICSEARCH_LEARNERS_INDEX = 'roster_test' ELASTICSEARCH_LEARNERS_UPDATE_INDEX = 'index_update_test' + +# Default the django-storage settings so we can test easily +DEFAULT_FILE_STORAGE = 'storages.backends.s3boto.S3BotoStorage' +AWS_ACCESS_KEY_ID = 'xxxxx' +AWS_SECRET_ACCESS_KEY = 'xxxxx' +AWS_STORAGE_BUCKET_NAME = 'fake-bucket' + +# Default settings for report download endpoint +COURSE_REPORT_FILE_LOCATION_TEMPLATE = '{course_id}/{report_name}.csv' +COURSE_REPORT_DOWNLOAD_EXPIRY_TIME = 86400 \ No newline at end of file From 819a744021a527ca6295416f6379cd5ceab1ab6d Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Wed, 3 Aug 2016 16:48:48 -0400 Subject: [PATCH 5/6] Fixing nits --- .../v0/tests/test_connections.py | 2 +- .../v0/tests/views/test_courses.py | 33 +++++++++++-------- analytics_data_api/v0/views/courses.py | 4 +-- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/analytics_data_api/v0/tests/test_connections.py b/analytics_data_api/v0/tests/test_connections.py index 7cfacc6a..44e7b061 100644 --- a/analytics_data_api/v0/tests/test_connections.py +++ b/analytics_data_api/v0/tests/test_connections.py @@ -33,7 +33,7 @@ def test_signing(self): self.assertTrue('my_access_key' in auth_header) def test_timeout(self): - def fake_connection(_address, timeout): + def fake_connection(_address, _timeout): raise socket.timeout('fake error') socket.create_connection = fake_connection connection = ESConnection('mockservice.cc-zone-1.amazonaws.com', diff --git a/analytics_data_api/v0/tests/views/test_courses.py b/analytics_data_api/v0/tests/views/test_courses.py index 39f43011..11acf08e 100644 --- a/analytics_data_api/v0/tests/views/test_courses.py +++ b/analytics_data_api/v0/tests/views/test_courses.py @@ -788,13 +788,6 @@ class CourseReportDownloadViewTests(DemoCourseMixin, TestCaseWithAuthentication) path = '/api/v0/courses/{course_id}/reports/{report_name}' - def wrap_get_file_object_url(function): - def replace_second_return(*args, **kwargs): - x, y = function(*args, **kwargs) - y = datetime.datetime(2014, 1, 1, tzinfo=pytz.utc) - return x, y - return replace_second_return - @patch('django.core.files.storage.default_storage.exists', Mock(return_value=False)) def test_report_file_not_found(self): response = self.authenticated_get( @@ -822,14 +815,19 @@ def test_storage_provider_other_than_s3(self): report_name='problem_response' ) ) - print(response) self.assertEqual(response.status_code, 501) @patch('django.core.files.storage.default_storage.exists', Mock(return_value=True)) @patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake')) - @patch('django.core.files.storage.default_storage.modified_time', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch( + 'django.core.files.storage.default_storage.modified_time', + Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)) + ) @patch('django.core.files.storage.default_storage.size', Mock(return_value=1000)) - @patch('analytics_data_api.utils.get_expiration_date', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch( + 'analytics_data_api.utils.get_expiration_date', + Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)) + ) def test_make_working_link(self): response = self.authenticated_get( self.path.format( @@ -850,9 +848,15 @@ def test_make_working_link(self): @patch('django.core.files.storage.default_storage.exists', Mock(return_value=True)) @patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake')) - @patch('django.core.files.storage.default_storage.modified_time', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch( + 'django.core.files.storage.default_storage.modified_time', + Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)) + ) @patch('django.core.files.storage.default_storage.size', Mock(side_effect=NotImplementedError())) - @patch('analytics_data_api.utils.get_expiration_date', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch( + 'analytics_data_api.utils.get_expiration_date', + Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)) + ) def test_make_working_link_with_missing_size(self): response = self.authenticated_get( self.path.format( @@ -874,7 +878,10 @@ def test_make_working_link_with_missing_size(self): @patch('django.core.files.storage.default_storage.url', Mock(return_value='http://fake')) @patch('django.core.files.storage.default_storage.modified_time', Mock(side_effect=NotImplementedError())) @patch('django.core.files.storage.default_storage.size', Mock(return_value=1000)) - @patch('analytics_data_api.utils.get_expiration_date', Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc))) + @patch( + 'analytics_data_api.utils.get_expiration_date', + Mock(return_value=datetime.datetime(2014, 1, 1, tzinfo=pytz.utc)) + ) def test_make_working_link_with_missing_last_modified_date(self): response = self.authenticated_get( self.path.format( diff --git a/analytics_data_api/v0/views/courses.py b/analytics_data_api/v0/views/courses.py index cc45b72f..b46075bf 100644 --- a/analytics_data_api/v0/views/courses.py +++ b/analytics_data_api/v0/views/courses.py @@ -9,9 +9,9 @@ from django.http import Http404 from django.utils.timezone import make_aware, utc from rest_framework import generics +from rest_framework.response import Response from rest_framework.views import APIView from opaque_keys.edx.keys import CourseKey -from rest_framework.response import Response from analytics_data_api.constants import enrollment_modes from analytics_data_api.utils import dictfetchall, get_course_report_download_details @@ -794,7 +794,7 @@ class ReportDownloadView(APIView): 'problem_response' ) - def get(self, request, course_id, report_name, *args, **kwargs): + def get(self, _request, course_id, report_name): if report_name in self.enabled_reports: response = get_course_report_download_details(course_id, report_name) return Response(response) From b0012d4abd384d53bec66d0b01d60200af2f358c Mon Sep 17 00:00:00 2001 From: Jesse Shapiro Date: Thu, 4 Aug 2016 05:42:49 -0400 Subject: [PATCH 6/6] Addressing review comments --- analytics_data_api/utils.py | 64 +++++++++++++++++++-------- analytics_data_api/v0/urls/courses.py | 2 +- 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/analytics_data_api/utils.py b/analytics_data_api/utils.py index 27a8c800..3d5b8db6 100644 --- a/analytics_data_api/utils.py +++ b/analytics_data_api/utils.py @@ -4,6 +4,7 @@ from django.db.models import Q from django.conf import settings from django.core.files.storage import default_storage +from django.core.exceptions import SuspiciousFileOperation, SuspiciousOperation from rest_framework.authtoken.models import Token from analytics_data_api.v0.exceptions import ( @@ -101,18 +102,32 @@ def get_course_report_download_details(course_id, report_name): Determine the path that the report file should be located at, then return metadata sufficient for downloading it. """ - report_location_template = getattr(settings, 'COURSE_REPORT_FILE_LOCATION_TEMPLATE') + report_location_template = getattr( + settings, + 'COURSE_REPORT_FILE_LOCATION_TEMPLATE', + '/{course_id}/{report_name}' + ) report_location = report_location_template.format( course_id=course_id, report_name=report_name ) - if getattr(settings, 'DEFAULT_FILE_STORAGE', None) != AWS_S3_FILE_STORAGE_CLASS: - # At present, we do not support linking from storage other than S3. + try: + if not default_storage.exists(report_location): + raise ReportFileNotFoundError(course_id=course_id, report_name=report_name) + except ( + AttributeError, + NotImplementedError, + ImportError, + SuspiciousFileOperation, + SuspiciousOperation + ): + # Error out if: + # - We don't have a method to determine file existence + # - Such a method isn't implemented + # - We can't import the specified storage class + # - We don't have privileges for the specified file location raise CannotCreateReportDownloadLinkError - if not default_storage.exists(report_location): - raise ReportFileNotFoundError(course_id=course_id, report_name=report_name) - try: last_modified = default_storage.modified_time(report_location) except (NotImplementedError, AttributeError): @@ -131,8 +146,9 @@ def get_course_report_download_details(course_id, report_name): report_name, # We need a date for the filename; if we don't know when it was last modified, # use the current date and time to stamp the filename. - (last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%S') + (last_modified or datetime.datetime.utcnow()).strftime('%Y%m%dT%H%M%SZ') ).replace( + # Remove characters that aren't filename-friendly ':', '-' ).replace( '+', '-' @@ -164,22 +180,32 @@ def get_file_object_url(filename, download_filename): We need to pass extra details to the URL method, above and beyond just the file location, to give us what we need. + + Currently, this method only supports S3 storage, and will need to be changed to + support building URLs on other storage providers. """ - # Default to expiring the link after one day - expire_length = getattr(settings, 'COURSE_REPORT_DOWNLOAD_EXPIRY_TIME', 86400) + # Default to expiring the link after two minutes + expire_length = getattr(settings, 'COURSE_REPORT_DOWNLOAD_EXPIRY_TIME', 120) expires_at = get_expiration_date(expire_length) - url = default_storage.url( - name=filename, - response_headers={ - 'response-content-disposition': 'attachment; filename={}'.format(download_filename), - 'response-content-type': 'text/csv', - # The Expires header requires a very particular timestamp format - 'response-expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT') - }, - expire=expire_length - ) + try: + url = default_storage.url( + name=filename, + response_headers={ + 'response-content-disposition': 'attachment; filename={}'.format(download_filename), + 'response-content-type': 'text/csv', + # The Expires header requires a very particular timestamp format + 'response-expires': expires_at.strftime('%a, %d %b %Y %H:%M:%S GMT') + }, + expire=expire_length + ) + except (AttributeError, TypeError, NotImplementedError): + # Either we can't find a .url() method, or we can't use it. Raise an exception. + raise CannotCreateReportDownloadLinkError return url, expires_at def get_expiration_date(seconds): + """ + Determine when a given link will expire, based on a given lifetime + """ return datetime.datetime.utcnow() + datetime.timedelta(seconds=seconds) diff --git a/analytics_data_api/v0/urls/courses.py b/analytics_data_api/v0/urls/courses.py index 27753439..56aa3a13 100644 --- a/analytics_data_api/v0/urls/courses.py +++ b/analytics_data_api/v0/urls/courses.py @@ -15,7 +15,7 @@ ('problems', views.ProblemsListView, 'problems'), ('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'), ('videos', views.VideosListView, 'videos'), - ('reports/(?P[a-zA-Z0-9_]+)', views.ReportDownloadView, 'reports') + ('reports/(?P[a-zA-Z0-9_]+)', views.ReportDownloadView, 'reports'), ] urlpatterns = []