Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ jobs:
- '3.6'
- '3.7'
- '3.8'
- 'pypy-3.8'
- '3.9'
- '3.10'
- '3.11'
Expand Down
11 changes: 10 additions & 1 deletion boxsdk/object/folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def upload_stream(
additional_attributes: Optional[dict] = None,
sha1: Optional[str] = None,
etag: Optional[str] = None,
stream_file_content: bool = True,
) -> 'File':
"""
Upload a file to the folder.
Expand Down Expand Up @@ -298,6 +299,9 @@ def upload_stream(
A sha1 checksum for the file.
:param etag:
If specified, instruct the Box API to update the item only if the current version's etag matches.
:param stream_file_content:
If True, the upload will be performed as a stream request. If False, the file will be read into memory
before being uploaded, but this may be required if using some proxy servers to handle redirects correctly.
:returns:
The newly uploaded file.
"""
Expand Down Expand Up @@ -335,7 +339,7 @@ def upload_stream(
if not headers:
headers = None
file_response = self._session.post(
url, data=data, files=files, expect_json_response=False, headers=headers
url, data=data, files=files, expect_json_response=False, headers=headers, stream_file_content=stream_file_content,
).json()
if 'entries' in file_response:
file_response = file_response['entries'][0]
Expand All @@ -358,6 +362,7 @@ def upload(
additional_attributes: Optional[dict] = None,
sha1: Optional[str] = None,
etag: Optional[str] = None,
stream_file_content: bool = True,
) -> 'File':
"""
Upload a file to the folder.
Expand Down Expand Up @@ -394,6 +399,9 @@ def upload(
A sha1 checksum for the new content.
:param etag:
If specified, instruct the Box API to update the item only if the current version's etag matches.
:param stream_file_content:
If True, the upload will be performed as a stream request. If False, the file will be read into memory
before being uploaded, but this may be required if using some proxy servers to handle redirects correctly.
:returns:
The newly uploaded file.
"""
Expand All @@ -412,6 +420,7 @@ def upload(
additional_attributes=additional_attributes,
sha1=sha1,
etag=etag,
stream_file_content=stream_file_content,
)

@api_call
Expand Down
14 changes: 8 additions & 6 deletions boxsdk/session/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ def _send_request(self, request: '_BoxRequest', **kwargs: Any) -> 'NetworkRespon
"""
# Reset stream positions to what they were when the request was made so the same data is sent even if this
# is a retried attempt.
files, file_stream_positions = kwargs.get('files'), kwargs.pop('file_stream_positions')
files, file_stream_positions, stream_file_content = (
kwargs.get('files'), kwargs.pop('file_stream_positions'), kwargs.pop('stream_file_content', True))
request_kwargs = self._default_network_request_kwargs.copy()
request_kwargs.update(kwargs)
proxy_dict = self._prepare_proxy()
Expand All @@ -477,11 +478,12 @@ def _send_request(self, request: '_BoxRequest', **kwargs: Any) -> 'NetworkRespon
if files and file_stream_positions:
for name, position in file_stream_positions.items():
files[name][1].seek(position)
data = request_kwargs.pop('data', {})
multipart_stream = MultipartStream(data, files)
request_kwargs['data'] = multipart_stream
del request_kwargs['files']
request.headers['Content-Type'] = multipart_stream.content_type
if stream_file_content:
data = request_kwargs.pop('data', {})
multipart_stream = MultipartStream(data, files)
request_kwargs['data'] = multipart_stream
del request_kwargs['files']
request.headers['Content-Type'] = multipart_stream.content_type
request.access_token = request_kwargs.pop('access_token', None)

# send the request
Expand Down
23 changes: 23 additions & 0 deletions docs/usage/files.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,32 @@ new_file = client.folder(folder_id).upload_stream(stream, file_name)
print(f'File "{new_file.name}" uploaded to Box with file ID {new_file.id}')
```

----
**NOTE:**

Both methods `folder.upload()` and `folder.upload_stream()` include the `stream_file_content` parameter,
which controls how the file content is uploaded.

If you are uploading a large file, you may want to stream the request to avoid excessive memory usage.
According to `requests'` library [docs][request_docs], by default, the `requests` library does not support streaming uploads,
and all the data must be read into memory before being sent to the server.
However, the `requests-toolbelt` package includes a `MultipartEncoder` class, which enables file uploads without
loading the entire file into memory. This approach is the default in the Box Python SDK.

That said, handling 307 Temporary Redirects presents a challenge with streamed file uploads.
307 redirect requires that both the request method and body remain unchanged.
This can be problematic when uploading a file stream because the stream will already be exhausted when the redirect occurs.

To address this issue, the `stream_file_content` parameter has been introduced in upload methods. This allows you to choose between:
- Streaming the file (`stream_file_content=True`): Optimizes memory usage but may cause issues with redirects.

- Using the default `requests'` library behavior (`stream_file_content=False`): Ensures the file can be re-read if a
redirect occurs but may consume more memory. This is especially important when working with proxy servers.

[folder_class]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder
[upload]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder.upload
[upload_stream]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder.upload_stream
[request_docs]: https://docs.python-requests.org/en/latest/user/quickstart/#post-a-multipart-encoded-file

Chunked Upload
--------------
Expand Down
19 changes: 2 additions & 17 deletions test/integration_new/object/ai_itest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,10 @@ def test_send_ai_question(parent_folder, small_file_path):
'type': 'file',
'content': 'The sun raises in the east.'
}]
ai_agent = {
'type': 'ai_agent_ask',
'basic_text_multi': {
'model': 'openai__gpt_3_5_turbo'
}
}
answer = CLIENT.send_ai_question(
items=items,
prompt='Which direction does the sun raise?',
mode='single_item_qa',
ai_agent=ai_agent
)
assert 'east' in answer['answer'].lower()
assert answer['completion_reason'] == 'done'
Expand All @@ -54,17 +47,10 @@ def test_send_ai_text_gen(parent_folder, small_file_path):
'answer': 'It takes 24 hours for the sun to rise.',
'created_at': '2013-12-12T11:20:43-08:00'
}]
ai_agent = {
'type': 'ai_agent_text_gen',
'basic_gen': {
'model': 'openai__gpt_3_5_turbo_16k'
}
}
answer = CLIENT.send_ai_text_gen(
dialogue_history=dialogue_history,
items=items,
prompt='Which direction does the sun raise?',
ai_agent=ai_agent
)
assert 'east' in answer['answer'].lower()
assert answer['completion_reason'] == 'done'
Expand All @@ -73,8 +59,7 @@ def test_send_ai_text_gen(parent_folder, small_file_path):
def test_get_ai_agent_default_config():
config = CLIENT.get_ai_agent_default_config(
mode='text_gen',
language='en',
model='openai__gpt_3_5_turbo'
language='en'
)
assert config['type'] == 'ai_agent_text_gen'
assert config['basic_gen']['model'] == 'openai__gpt_3_5_turbo'
assert config['basic_gen']['model'] != ''
17 changes: 14 additions & 3 deletions test/integration_new/object/folder_itest.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def test_auto_chunked_upload_NOT_using_upload_session_urls(parent_folder, large_


def test_get_items(parent_folder, small_file_path):
with BoxTestFolder(parent_folder=parent_folder) as subfolder,\
BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as file,\
with BoxTestFolder(parent_folder=parent_folder) as subfolder, \
BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as file, \
BoxTestWebLink(parent_folder=parent_folder, url='https://box.com') as web_link:

assert set(parent_folder.get_items()) == {subfolder, file, web_link}
Expand All @@ -130,6 +130,17 @@ def test_upload_small_file_to_folder(parent_folder, small_file_name, small_file_
util.permanently_delete(uploaded_file)


def test_upload_small_file_to_folder_with_disabled_streaming_file_content(
parent_folder, small_file_name, small_file_path
):
uploaded_file = parent_folder.upload(file_path=small_file_path, file_name=small_file_name, stream_file_content=False)
try:
assert uploaded_file.id
assert uploaded_file.parent == parent_folder
finally:
util.permanently_delete(uploaded_file)


def test_create_subfolder(parent_folder):
created_subfolder = parent_folder.create_subfolder(name=util.random_name())
try:
Expand Down Expand Up @@ -199,7 +210,7 @@ def test_delete_folder(parent_folder):


def test_cascade_and_get_metadata_cascade_policies(parent_folder):
with BoxTestMetadataTemplate(display_name="test_template") as metadata_template,\
with BoxTestMetadataTemplate(display_name="test_template") as metadata_template, \
BoxTestFolder(parent_folder=parent_folder) as folder:
folder.cascade_metadata(metadata_template)

Expand Down
14 changes: 7 additions & 7 deletions test/integration_new/object/trash_itest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ def test_trash_get_items(parent_folder, small_file_path):
test_file = parent_folder.upload(file_path=small_file_path, file_name=name)
test_file.delete()
try:
trash_items = CLIENT.trash().get_items()
assert test_file.id in [item.id for item in trash_items]
trashed_file = test_file.get()
assert trashed_file.item_status == 'trashed'
finally:
CLIENT.trash().permanently_delete_item(test_file)


def test_trash_restore_item(parent_folder, small_file_path):
with BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as test_file:
test_file.delete()
trash_items = CLIENT.trash().get_items()
assert test_file.id in [item.id for item in trash_items]
folder_items = parent_folder.get_items()
assert test_file.id not in [item.id for item in folder_items]
CLIENT.trash().restore_item(test_file)
folder_items = parent_folder.get_items()
assert test_file.id in [item.id for item in folder_items]
Expand All @@ -46,7 +46,7 @@ def test_trash_get_items_with_offset(parent_folder, small_file_path):
try:
trash_items = CLIENT.trash().get_items()
assert isinstance(trash_items, LimitOffsetBasedObjectCollection)
assert test_file.id in [item.id for item in trash_items]
assert trash_items.next() is not None
finally:
CLIENT.trash().permanently_delete_item(test_file)

Expand All @@ -56,8 +56,8 @@ def test_trash_get_items_with_marker(parent_folder, small_file_path):
test_file = parent_folder.upload(file_path=small_file_path, file_name=name)
test_file.delete()
try:
trash_items = CLIENT.trash().get_items(limit=100, use_marker=True)
trash_items = CLIENT.trash().get_items(limit=5, use_marker=True)
assert isinstance(trash_items, MarkerBasedObjectCollection)
assert test_file.id in [item.id for item in trash_items]
assert trash_items.next() is not None
finally:
CLIENT.trash().permanently_delete_item(test_file)
32 changes: 30 additions & 2 deletions test/unit/object/test_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from datetime import datetime
from io import BytesIO
from os.path import basename
from unittest.mock import mock_open, patch, Mock, MagicMock
from unittest.mock import mock_open, patch, Mock, MagicMock, ANY
import pytest
import pytz

Expand Down Expand Up @@ -334,7 +334,14 @@ def test_upload(
# in Python 2 tests
attributes.update(additional_attributes)
data = {'attributes': json.dumps(attributes)}
mock_box_session.post.assert_called_once_with(expected_url, expect_json_response=False, files=mock_files, data=data, headers=if_match_sha1_header)
mock_box_session.post.assert_called_once_with(
expected_url,
expect_json_response=False,
files=mock_files,
data=data,
headers=if_match_sha1_header,
stream_file_content=True
)
assert isinstance(new_file, File)
assert new_file.object_id == mock_object_id
assert 'id' in new_file
Expand Down Expand Up @@ -438,6 +445,27 @@ def test_upload_does_preflight_check_if_specified(
assert not test_folder.preflight_check.called


@patch('boxsdk.object.folder.open', mock_open(read_data=b'some bytes'), create=True)
@pytest.mark.parametrize('stream_file_content', (True, False))
def test_upload_if_flag_stream_file_content_is_passed_to_session(
mock_box_session,
test_folder,
stream_file_content,
):
expected_url = f'{API.UPLOAD_URL}/files/content'

test_folder.upload('foo.txt', file_name='foo.txt', stream_file_content=stream_file_content)

mock_files = {'file': ('unused', ANY)}
mock_box_session.post.assert_called_once_with(
expected_url,
data=ANY,
files=mock_files,
expect_json_response=False,
headers=None,
stream_file_content=stream_file_content)


def test_create_subfolder(test_folder, mock_box_session, mock_object_id, mock_folder_response):
expected_url = test_folder.get_type_url()
mock_box_session.post.return_value = mock_folder_response
Expand Down
49 changes: 48 additions & 1 deletion test/unit/session/test_session.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from functools import partial
from io import IOBase
from io import IOBase, BytesIO
from numbers import Number
import os
from unittest.mock import MagicMock, Mock, PropertyMock, call, patch, ANY
from requests.exceptions import RequestException, SSLError, ConnectionError as RequestsConnectionError
from requests_toolbelt import MultipartEncoder

import pytest

Expand Down Expand Up @@ -449,3 +451,48 @@ def test_proxy_malformed_dict_does_not_attach(box_session, monkeypatch, mock_net

def test_proxy_network_config_property(box_session):
assert isinstance(box_session.proxy_config, Proxy)


def test_multipart_request_with_disabled_streaming_file_content(
box_session, mock_network_layer, generic_successful_response):
test_url = 'https://example.com'
file_bytes = os.urandom(1024)
mock_network_layer.request.side_effect = [generic_successful_response]
box_session.post(
url=test_url,
files={'file': ('unused', BytesIO(file_bytes))},
data={'attributes': '{"name": "test_file"}'},
stream_file_content=False
)
mock_network_layer.request.assert_called_once_with(
'POST',
test_url,
access_token='fake_access_token',
headers=ANY,
log_response_content=True,
files={'file': ('unused', ANY)},
data={'attributes': '{"name": "test_file"}'},
)


def test_multipart_request_with_enabled_streaming_file_content(
box_session, mock_network_layer, generic_successful_response):
test_url = 'https://example.com'
file_bytes = os.urandom(1024)
mock_network_layer.request.side_effect = [generic_successful_response]
box_session.post(
url=test_url,
files={'file': ('unused', BytesIO(file_bytes))},
data={'attributes': '{"name": "test_file"}'},
stream_file_content=True
)
call_args = mock_network_layer.request.call_args[0]
call_kwargs = mock_network_layer.request.call_args[1]
assert call_args[0] == 'POST'
assert call_args[1] == test_url
assert call_kwargs['access_token'] == 'fake_access_token'
assert call_kwargs['log_response_content'] is True
assert isinstance(call_kwargs['data'], MultipartEncoder)
assert call_kwargs['data'].fields['attributes'] == '{"name": "test_file"}'
assert call_kwargs['data'].fields['file'][0] == 'unused'
assert isinstance(call_kwargs['data'].fields['file'][1], BytesIO)