Skip to content
Permalink
Browse files
AIRAVATA-3420 Allow reading just a few bytes from very large files by…
… streaming http download (initial use case is to determine file type)
  • Loading branch information
machristie committed Aug 4, 2021
1 parent 9c5e75e commit 94ec26a463888cebffbe468f90e6f1e893152d48
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
@@ -843,10 +843,10 @@ def _determine_content_type(full_path, content_type=None, backend=None):
# Check if file is Unicode text by trying to read some of it
try:
if backend is not None:
file = backend.open(full_path)
# Try to decode the first kb as UTF8
file.read(1024).decode('utf-8')
result = "text/plain"
with backend.open(full_path) as file:
# Try to decode the first kb as UTF8
file.read(1024).decode('utf-8')
result = "text/plain"
except UnicodeDecodeError:
logger.debug(f"Failed to read as Unicode text: {full_path}")
return result
@@ -205,11 +205,14 @@ def get_download_url(self, resource_path):

def open(self, resource_path):
download_url = self.get_download_url(resource_path)
r = requests.get(download_url)
r = requests.get(download_url, stream=True)
r.raise_for_status()
file = io.BytesIO(r.content)
file.name = os.path.basename(resource_path)
return file
# raw stream doesn't automatically decode the response body based on the
# transfer encoding, but setting decode_content to True causes it to do
# the decoding.
r.raw.decode_content = True
r.raw.name = os.path.basename(resource_path)
return r.raw

def _get_child_path(self, resource_path):
"""Convert resource path into child path appropriate for resource."""

0 comments on commit 94ec26a

Please sign in to comment.