diff --git a/boxsdk/client/client.py b/boxsdk/client/client.py index 3a9dbb6a8..a37f123c8 100644 --- a/boxsdk/client/client.py +++ b/boxsdk/client/client.py @@ -101,6 +101,21 @@ def file(self, file_id): """ return self.translator.translate('file')(session=self._session, object_id=file_id) + def upload_session(self, session_id): + """ + Initialize a :class:`UploadSession` object, whose box id is session_id. + + :param session_id: + The box id of the :class:`UploadSession` object. + :type session_id: + `unicode` + :return: + A :class:`UploadSession` object with the given session id. + :rtype: + :class`UploadSession` + """ + return self.translator.get('upload_session')(session=self._session, object_id=session_id) + def comment(self, comment_id): """ Initialize a :class:`Comment` object, whose Box ID is comment_id. diff --git a/boxsdk/object/__init__.py b/boxsdk/object/__init__.py index 53d5d4e17..eca76a625 100644 --- a/boxsdk/object/__init__.py +++ b/boxsdk/object/__init__.py @@ -32,6 +32,7 @@ 'task', 'task_assignment', 'user', + 'upload_session', 'webhook', 'watermark', 'web_link', diff --git a/boxsdk/object/file.py b/boxsdk/object/file.py index 72bfe5f42..f8e4aca95 100644 --- a/boxsdk/object/file.py +++ b/boxsdk/object/file.py @@ -1,9 +1,10 @@ # coding: utf-8 -from __future__ import unicode_literals +from __future__ import unicode_literals, absolute_import import json +from boxsdk.config import API from .item import Item from ..util.api_call_decorator import api_call from ..pagination.marker_based_object_collection import MarkerBasedObjectCollection @@ -36,6 +37,37 @@ def preflight_check(self, size, name=None): file_id=self._object_id, ) + def create_upload_session(self, file_size, file_name=None): + """ + Create a new chunked upload session for uploading a new version of the file. + + :param file_size: + The size of the file in bytes that will be uploaded. + :type file_size: + `int` + :param file_name: + The new name of the file version that will be uploaded. + :type file_name: + `unicode` or None + :returns: + A :class:`ChunkedUploadSession` object. + :rtype: + :class:`ChunkedUploadSession` + """ + body_params = { + 'file_id': self.object_id, + 'file_size': file_size, + } + if file_name is not None: + body_params['file_name'] = file_name + url = self.get_url('{0}'.format('upload_sessions')).replace(API.BASE_API_URL, API.UPLOAD_URL) + response = self._session.post(url, data=json.dumps(body_params)).json() + return self.translator.translate(response['type'])( + session=self.session, + object_id=response['id'], + response_object=response, + ) + def _get_accelerator_upload_url_for_update(self): """ Get Accelerator upload url for updating the file. diff --git a/boxsdk/object/folder.py b/boxsdk/object/folder.py index 777a5e57c..2b6ae6a98 100644 --- a/boxsdk/object/folder.py +++ b/boxsdk/object/folder.py @@ -5,6 +5,7 @@ import os from six import text_type +from boxsdk.config import API from boxsdk.object.group import Group from boxsdk.object.item import Item from boxsdk.object.user import User @@ -112,6 +113,36 @@ def preflight_check(self, size, name): parent_id=self._object_id, ) + def create_upload_session(self, file_size, file_name): + """ + Creates a new chunked upload session for upload a new file. + + :param file_size: + The size of the file in bytes that will be uploaded. + :type file_size: + `int` + :param file_name: + The new name of the file that will be uploaded. + :type file_name: + `unicode` + :returns: + A :class:`ChunkedUploadSession` object. + :rtype: + :class:`ChunkedUploadSession` + """ + url = '{0}/files/upload_sessions'.format(API.UPLOAD_URL) + body_params = { + 'folder_id': self.object_id, + 'file_size': file_size, + 'file_name': file_name, + } + response = self._session.post(url, data=json.dumps(body_params)).json() + return self.translator.translate(response['type'])( + session=self.session, + object_id=response['id'], + response_object=response, + ) + def _get_accelerator_upload_url_fow_new_uploads(self): """ Get Accelerator upload url for uploading new files. diff --git a/boxsdk/object/upload_session.py b/boxsdk/object/upload_session.py new file mode 100644 index 000000000..65dfc8ead --- /dev/null +++ b/boxsdk/object/upload_session.py @@ -0,0 +1,163 @@ +# coding: utf-8 +from __future__ import unicode_literals, absolute_import + +import base64 +import hashlib +import json + +from .base_object import BaseObject +from ..config import API +from ..pagination.chunked_upload_part_limit_offset_based_object_collection import ChunkedUploadPartLimitOffsetBasedObjectCollection + + +class UploadSession(BaseObject): + _item_type = 'upload_session' + _parent_item_type = 'file' + + def get_url(self, *args): + """ + Base class override. Endpoint is a little different - it's /files/upload_sessions. + + :rtype: + `unicode` + """ + return self.session.get_url( + '{0}s/{1}s'.format(self._parent_item_type, self._item_type), + self._object_id, + *args + ).replace(API.BASE_API_URL, API.UPLOAD_URL) + + def get_parts(self, limit=None, offset=None, fields=None): + """ + Get a list of parts uploaded so far. + + :param limit: + The maximum number of items to return per page. If not specified, then will use the server-side default. + :type limit: + `int` or None + :param offset: + The index at which to start returning items. + :type offset: + `int` or None + :param fields: + Fields to include on the returned items. + :type fields: + `Iterable` of `unicode` + :returns: + Returns a `list` of parts uploaded so far. + :rtype: + `list` of `dict` + """ + return ChunkedUploadPartLimitOffsetBasedObjectCollection( + session=self.session, + url=self.get_url('parts'), + limit=limit, + fields=fields, + offset=offset, + return_full_pages=False, + ) + + def upload_part(self, part_bytes, offset, total_size, part_content_sha1=None): + """ + Upload a part of a file. + + :param part_bytes: + Part bytes + :type part_bytes: + `bytes` + :param offset: + Offset, in number of bytes, of the part compared to the beginning of the file. + :type offset: + `int` + :param total_size: + The size of the file that this part belongs to. + :type total_size: + `int` + :param part_content_sha1: + SHA-1 hash of the part's content. If not specified, this will be calculated. + :type part_content_sha1: + `unicode` + :returns: + The uploaded part. + :rtype: + `dict` + """ + + if part_content_sha1 is None: + sha1 = hashlib.sha1() + sha1.update(part_bytes) + part_content_sha1 = sha1.digest() + + range_end = min(offset + self.part_size - 1, total_size - 1) # pylint:disable=no-member + + return self._session.put( + self.get_url(), + headers={ + 'Content-Type': 'application/octet-stream', + 'Digest': 'SHA={0}'.format(base64.b64encode(part_content_sha1).decode('utf-8')), + 'Content-Range': 'bytes {0}-{1}/{2}'.format(offset, range_end, total_size), + }, + data=part_bytes + ) + + def commit(self, content_sha1, parts=None, file_attributes=None, etag=None): + """ + Commit a multiput upload. + + :param content_sha1: + SHA-1 has of the file contents that was uploaded. + :type content_sha1: + `unicode` + :param parts: + List of parts that were uploaded. + :type parts: + `Iterable` of `dict` or None + :param file_attributes: + An `dict` of attributes to set on file upload. + :type file_attributes: + `dict` + :returns: + A :class:`File` object. + :rtype: + :class:`File` + """ + body = {} + parts_list = [] + if file_attributes is not None: + body['attributes'] = file_attributes + if parts is None: + parts = self.get_parts() + for part in parts: + parts_list.append(part) + body['parts'] = parts_list + else: + body['parts'] = parts + headers = { + 'Content-Type': 'application/json', + 'Digest': 'SHA={0}'.format(base64.b64encode(content_sha1).decode('utf-8')), + } + if etag is not None: + headers['If-Match'] = etag + response = self._session.post( + self.get_url('commit'), + headers=headers, + data=json.dumps(body), + ).json() + entry = response['entries'][0] + return self.translator.translate(entry['type'])( + session=self.session, + object_id=entry['id'], + response_object=entry, + ) + + def abort(self): + """ + Abort an upload session, cancelling the upload and removing any parts that have already been uploaded. + + :returns: + A boolean indication success of the upload abort. + :rtype: + `bool` + """ + response = self._session.delete(self.get_url()) + return response.ok diff --git a/boxsdk/pagination/box_object_collection.py b/boxsdk/pagination/box_object_collection.py index c04001911..4d1dcba40 100644 --- a/boxsdk/pagination/box_object_collection.py +++ b/boxsdk/pagination/box_object_collection.py @@ -27,6 +27,8 @@ class BoxObjectCollection(collections.Iterator, object): will be used to retrieve the next page of Box objects. This pointer can be used when requesting new BoxObjectCollection instances that start off from a particular page, instead of from the very beginning. """ + _page_constructor = Page + def __init__( self, session, @@ -101,7 +103,7 @@ def _items_generator(self): self._update_pointer_to_next_page(response_object) self._has_retrieved_all_items = not self._has_more_pages(response_object) - page = Page(self._session, response_object) + page = self._page_constructor(self._session, response_object) if self._return_full_pages: yield page diff --git a/boxsdk/pagination/chunked_upload_part_limit_offset_based_object_collection.py b/boxsdk/pagination/chunked_upload_part_limit_offset_based_object_collection.py new file mode 100644 index 000000000..d432ff996 --- /dev/null +++ b/boxsdk/pagination/chunked_upload_part_limit_offset_based_object_collection.py @@ -0,0 +1,10 @@ +# coding: utf-8 + +from __future__ import unicode_literals, absolute_import + +from .chunked_upload_part_page import ChunkedUploadPartPage +from .limit_offset_based_object_collection import LimitOffsetBasedObjectCollection + + +class ChunkedUploadPartLimitOffsetBasedObjectCollection(LimitOffsetBasedObjectCollection): + _page_constructor = ChunkedUploadPartPage diff --git a/boxsdk/pagination/chunked_upload_part_page.py b/boxsdk/pagination/chunked_upload_part_page.py new file mode 100644 index 000000000..0b682e767 --- /dev/null +++ b/boxsdk/pagination/chunked_upload_part_page.py @@ -0,0 +1,12 @@ +# coding: utf-8 + +from __future__ import unicode_literals, absolute_import + +from .page import Page + + +class ChunkedUploadPartPage(Page): + def __getitem__(self, key): + item_json = self._response_object[self._item_entries_key_name][key] + + return item_json diff --git a/docs/file.md b/docs/file.md new file mode 100644 index 000000000..68be5aa8c --- /dev/null +++ b/docs/file.md @@ -0,0 +1,85 @@ +Chunked Upload +-------------- + +For large files or in cases where the network connection is less reliable, +you may want to upload the file in parts. This allows a single part to fail +without aborting the entire upload, and failed parts can then be retried. + + + + + +- [Manual Process](#manual-process) + - [Create Upload Session for File Version](#create-upload-session-for-file-version) + - [Create Upload Session for File](#create-upload-session-for-file) + - [Upload Part](#upload-part) + - [Commit Upload Session](#commit-upload-session) + - [Abort Upload Session](#abort-upload-session) + - [List Upload Parts](#list-upload-parts) + + + +### Manual Process + +For more complicated upload scenarios, such as those being coordinated across +multiple processes or when an unrecoverable error occurs with the automatic +uploader, the endpoints for chunked upload operations are also exposed directly. + +The individual endpoint methods are detailed below: + +#### Create Upload Session for File Version + +To create an upload session for uploading a large version, use `file.create_upload_session(file_size, file_name=None)` + +```python +file_size = 197520 +upload_session = client.file('11111').create_upload_session(file_size=file_size) +``` + +#### Create Upload Session for File + +To create an upload session for uploading a large file, use +`folder.create_upload_session(file_size, file_name)` + +```python +file_size = 197520 +file_name = 'test_file.pdf' +upload_session = client.folder('22222').create_upload_session(file_size=file_size, file_name=file_name) +``` + +#### Upload Part + +To upload a part of the file to this session, use `upload_session.upload_part(content_stream, offset, total_size, part_content_sha1=None)` + +```python +from io import BytesIO +chunk = BytesIO(b'abcdefgh') +offset = 32 +upload_part = client.upload_session('11493C07ED3EABB6E59874D3A1EF3581').upload_part(chunk, offset, total_size) +``` + +#### Commit Upload Session + +To commit the upload session to Box, use `upload_session.commit(content_sha1, parts=None, file_attributes=None)`. + +```python +import hashlib +parts = client.upload_session('11493C07ED3EABB6E59874D3A1EF3581').get_parts() +uploaded_file = client.upload_session('11493C07ED3EABB6E59874D3A1EF3581').commit(parts, sha1.digest()) +``` + +#### Abort Upload Session + +To abort a chunked upload, use `upload_session.abort()`. + +```python +client.upload_session('11493C07ED3EABB6E59874D3A1EF3581').abort() +``` + +#### List Upload Parts + +To return the list of parts uploaded so far, use `upload_session.get_parts(limit=None, offset=None, fields=None)`. + +```python +parts = client.upload_session('11493C07ED3EABB6E59874D3A1EF3581').get_parts() +``` \ No newline at end of file diff --git a/test/unit/client/test_client.py b/test/unit/client/test_client.py index b8ce61986..4309e5340 100644 --- a/test/unit/client/test_client.py +++ b/test/unit/client/test_client.py @@ -25,6 +25,7 @@ from boxsdk.object.file import File from boxsdk.object.group import Group from boxsdk.object.user import User +from boxsdk.object.upload_session import UploadSession from boxsdk.object.trash import Trash from boxsdk.object.group_membership import GroupMembership from boxsdk.object.retention_policy import RetentionPolicy @@ -340,7 +341,8 @@ def device_pins_response(device_pin_id_1, device_pin_id_2): (Group, 'group'), (GroupMembership, 'group_membership'), (Enterprise, 'enterprise'), - (Webhook, 'webhook') + (Webhook, 'webhook'), + (UploadSession, 'upload_session'), ]) def test_factory_returns_the_correct_object(mock_client, test_class, factory_method_name): """ Tests the various id-only factory methods in the Client class """ diff --git a/test/unit/object/test_file.py b/test/unit/object/test_file.py index 8c93f3f81..1b48b091e 100644 --- a/test/unit/object/test_file.py +++ b/test/unit/object/test_file.py @@ -10,6 +10,7 @@ from boxsdk.object.comment import Comment from boxsdk.object.file import File from boxsdk.object.task import Task +from boxsdk.object.upload_session import UploadSession # pylint:disable=protected-access @@ -42,6 +43,37 @@ def test_delete_file(test_file, mock_box_session, etag, if_match_header): ) +def test_create_upload_session(test_file, mock_box_session): + expected_url = '{0}/files/{1}/upload_sessions'.format(API.UPLOAD_URL, test_file.object_id) + file_size = 197520 + part_size = 12345 + total_parts = 16 + num_parts_processed = 0 + upload_session_type = 'upload_session' + upload_session_id = 'F971964745A5CD0C001BBE4E58196BFD' + file_name = 'test_file.pdf' + expected_data = { + 'file_id': test_file.object_id, + 'file_size': file_size, + 'file_name': file_name + } + mock_box_session.post.return_value.json.return_value = { + 'id': upload_session_id, + 'type': upload_session_type, + 'num_parts_processed': num_parts_processed, + 'total_parts': total_parts, + 'part_size': part_size, + } + upload_session = test_file.create_upload_session(file_size, file_name) + mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data)) + assert isinstance(upload_session, UploadSession) + assert upload_session.part_size == part_size + assert upload_session.total_parts == total_parts + assert upload_session.num_parts_processed == num_parts_processed + assert upload_session.type == upload_session_type + assert upload_session.id == upload_session_id + + def test_create_task(test_file, test_task, mock_box_session): # pylint:disable=redefined-outer-name expected_url = "{0}/tasks".format(API.BASE_API_URL) diff --git a/test/unit/object/test_folder.py b/test/unit/object/test_folder.py index 2b7249134..19b9a764e 100644 --- a/test/unit/object/test_folder.py +++ b/test/unit/object/test_folder.py @@ -14,6 +14,7 @@ from boxsdk.object.web_link import WebLink from boxsdk.object.collaboration import Collaboration, CollaborationRole from boxsdk.object.folder import Folder, FolderSyncState +from boxsdk.object.upload_session import UploadSession from boxsdk.session.box_response import BoxResponse @@ -188,6 +189,37 @@ def test_upload( assert 'entries' not in new_file +def test_create_upload_session(test_folder, mock_box_session): + expected_url = '{0}/files/upload_sessions'.format(API.UPLOAD_URL) + file_size = 197520 + file_name = 'test_file.pdf' + upload_session_id = 'F971964745A5CD0C001BBE4E58196BFD' + upload_session_type = 'upload_session' + num_parts_processed = 0 + total_parts = 16 + part_size = 12345 + expected_data = { + 'folder_id': test_folder.object_id, + 'file_size': file_size, + 'file_name': file_name, + } + mock_box_session.post.return_value.json.return_value = { + 'id': upload_session_id, + 'type': upload_session_type, + 'num_parts_processed': num_parts_processed, + 'total_parts': total_parts, + 'part_size': part_size, + } + upload_session = test_folder.create_upload_session(file_size, file_name) + mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data)) + assert isinstance(upload_session, UploadSession) + assert upload_session.part_size == part_size + assert upload_session.total_parts == total_parts + assert upload_session.num_parts_processed == num_parts_processed + assert upload_session.type == upload_session_type + assert upload_session.id == upload_session_id + + def test_upload_stream_does_preflight_check_if_specified( mock_box_session, test_folder, diff --git a/test/unit/object/test_upload_session.py b/test/unit/object/test_upload_session.py new file mode 100644 index 000000000..0e612372f --- /dev/null +++ b/test/unit/object/test_upload_session.py @@ -0,0 +1,176 @@ +# coding: utf-8 + +from __future__ import unicode_literals, absolute_import + +import base64 +import hashlib +from io import BytesIO +import json +import pytest + +from boxsdk.config import API +from boxsdk.object.file import File +from boxsdk.object.upload_session import UploadSession + + +@pytest.fixture() +def test_upload_session(mock_box_session): + upload_session_response_object = { + 'part_size': 8, + 'total_parts': 10, + } + return UploadSession(mock_box_session, '11493C07ED3EABB6E59874D3A1EF3581', upload_session_response_object) + + +def test_get_parts(test_upload_session, mock_box_session): + expected_url = '{0}/files/upload_sessions/{1}/parts'.format(API.UPLOAD_URL, test_upload_session.object_id) + mock_entry = { + 'part_id': '8F0966B1', + 'offset': 0, + 'size': 8, + 'sha1': None, + } + mock_box_session.get.return_value.json.return_value = { + 'entries': [mock_entry], + 'offset': 0, + 'total_count': 1, + 'limit': 1000, + } + test_parts = test_upload_session.get_parts() + part = test_parts.next() + mock_box_session.get.assert_called_once_with(expected_url, params={'offset': None}) + assert isinstance(part, dict) + assert part['part_id'] == mock_entry['part_id'] + assert part['size'] == mock_entry['size'] + assert part['offset'] == mock_entry['offset'] + + +def test_abort(test_upload_session, mock_box_session): + expected_url = '{0}/files/upload_sessions/{1}'.format(API.UPLOAD_URL, test_upload_session.object_id) + mock_box_session.delete.return_value.ok = True + result = test_upload_session.abort() + mock_box_session.delete.assert_called_once_with(expected_url) + assert result is True + + +def test_upload_part(test_upload_session, mock_box_session): + expected_url = '{0}/files/upload_sessions/{1}'.format(API.UPLOAD_URL, test_upload_session.object_id) + part_bytes = BytesIO(b'abcdefgh') + chunk = part_bytes.read(20) + offset = 32 + total_size = 80 + expected_sha1 = 'QlrxKgdDUCsyLpOgFbz4aOMk1Wo=' + expected_headers = { + 'Content-Type': 'application/octet-stream', + 'Digest': 'SHA={}'.format(expected_sha1), + 'Content-Range': 'bytes 32-39/80', + } + mock_box_session.put.return_value = { + 'part': { + 'part_id': 'ABCDEF123', + 'offset': offset, + 'size': 8, + 'sha1': expected_sha1, + }, + } + part = test_upload_session.upload_part(chunk, offset, total_size) + + mock_box_session.put.assert_called_once_with(expected_url, data=chunk, headers=expected_headers) + assert part['part']['sha1'] == expected_sha1 + + +def test_commit(test_upload_session, mock_box_session): + expected_url = '{0}/files/upload_sessions/{1}/commit'.format(API.UPLOAD_URL, test_upload_session.object_id) + sha1 = hashlib.sha1() + sha1.update(b'fake_file_data') + file_id = '12345' + file_type = 'file' + file_attributes = ['content_modified_at'] + parts = [ + { + 'part_id': 'ABCDEF123', + 'offset': 0, + 'size': 8, + 'sha1': 'fake_sha1', + }, + { + 'part_id': 'ABCDEF456', + 'offset': 8, + 'size': 8, + 'sha1': 'fake_sha1', + }, + ] + expected_data = { + 'attributes': file_attributes, + 'parts': parts, + } + expected_headers = { + 'Content-Type': 'application/json', + 'Digest': 'SHA={}'.format(base64.b64encode(sha1.digest()).decode('utf-8')), + } + + mock_box_session.post.return_value.json.return_value = { + 'entries': [ + { + 'type': file_type, + 'id': file_id, + 'content_modified_at': '2017-11-02T15:04:38-07:00', + }, + ], + } + created_file = test_upload_session.commit(content_sha1=sha1.digest(), parts=parts, file_attributes=file_attributes) + mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data), headers=expected_headers) + assert isinstance(created_file, File) + assert created_file.id == file_id + assert created_file.type == file_type + assert created_file.content_modified_at == '2017-11-02T15:04:38-07:00' + + +def test_commit_with_missing_params(test_upload_session, mock_box_session): + expected_get_url = '{0}/files/upload_sessions/{1}/parts'.format(API.UPLOAD_URL, test_upload_session.object_id) + expected_url = '{0}/files/upload_sessions/{1}/commit'.format(API.UPLOAD_URL, test_upload_session.object_id) + sha1 = hashlib.sha1() + sha1.update(b'fake_file_data') + file_id = '12345' + file_type = 'file' + parts = [ + { + 'part_id': '8F0966B1', + 'offset': 0, + 'size': 8, + 'sha1': None, + }, + ] + expected_data = { + 'parts': parts, + } + expected_headers = { + 'Content-Type': 'application/json', + 'Digest': 'SHA={}'.format(base64.b64encode(sha1.digest()).decode('utf-8')), + } + mock_entry = { + 'part_id': '8F0966B1', + 'offset': 0, + 'size': 8, + 'sha1': None, + } + mock_box_session.get.return_value.json.return_value = { + 'entries': [mock_entry], + 'offset': 0, + 'total_count': 1, + 'limit': 1000, + } + mock_box_session.post.return_value.json.return_value = { + 'entries': [ + { + 'type': file_type, + 'id': file_id, + }, + ], + } + created_file = test_upload_session.commit(content_sha1=sha1.digest()) + mock_box_session.get.assert_called_once_with(expected_get_url, params={'offset': None}) + mock_box_session.post.assert_called_once_with(expected_url, data=json.dumps(expected_data), headers=expected_headers) + assert isinstance(created_file, File) + assert created_file.id == file_id + assert created_file.type == file_type