Skip to content

Commit

Permalink
Merge pull request #14990 from mvdbeek/fix_byte_range_support
Browse files Browse the repository at this point in the history
[22.05] Fix ``x-accel-redirect`` handling and implement byte-range handling
  • Loading branch information
mvdbeek committed Nov 16, 2022
2 parents b3ec014 + 693b25b commit 3b068ee
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 48 deletions.
137 changes: 136 additions & 1 deletion lib/galaxy/webapps/base/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import os
import stat
import typing

import anyio
from fastapi import (
FastAPI,
HTTPException,
Request,
status,
)
Expand All @@ -9,7 +15,10 @@
BaseHTTPMiddleware,
RequestResponseEndpoint,
)
from starlette.responses import Response
from starlette.responses import (
FileResponse,
Response,
)
from starlette_context.middleware import RawContextMiddleware
from starlette_context.plugins import RequestIdPlugin

Expand All @@ -20,6 +29,132 @@
validation_error_to_message_exception,
)

if typing.TYPE_CHECKING:
from starlette.background import BackgroundTask
from starlette.types import (
Receive,
Scope,
Send,
)


# Copied from https://github.com/tiangolo/fastapi/issues/1240#issuecomment-1055396884
def _get_range_header(range_header: str, file_size: int) -> typing.Tuple[int, int]:
def _invalid_range():
return HTTPException(
status.HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE,
detail=f"Invalid request range (Range:{range_header!r})",
)

try:
h = range_header.replace("bytes=", "").rsplit("-", 1)
start = int(h[0]) if h[0] != "" else 0
end = int(h[1]) if h[1] != "" else file_size - 1
except ValueError:
raise _invalid_range()

if start > end or start < 0 or end > file_size - 1:
raise _invalid_range()
return start, end


class GalaxyFileResponse(FileResponse):
"""
Augments starlette FileResponse with x-accel-redirect/x-sendfile and byte-range handling.
"""

nginx_x_accel_redirect_base: typing.Optional[str] = None
apache_xsendfile: typing.Optional[bool] = None
send_header_only: bool

def __init__(
self,
path: typing.Union[str, "os.PathLike[str]"],
status_code: int = 200,
headers: typing.Optional[typing.Mapping[str, str]] = None,
media_type: typing.Optional[str] = None,
background: typing.Optional["BackgroundTask"] = None,
filename: typing.Optional[str] = None,
stat_result: typing.Optional[os.stat_result] = None,
method: typing.Optional[str] = None,
content_disposition_type: str = "attachment",
) -> None:
super().__init__(
path, status_code, headers, media_type, background, filename, stat_result, method, content_disposition_type
)
self.headers["accept-ranges"] = "bytes"
send_header_only = self.nginx_x_accel_redirect_base or self.apache_xsendfile
if self.nginx_x_accel_redirect_base:
self.headers["x-accel-redirect"] = self.nginx_x_accel_redirect_base + os.path.abspath(path)
elif self.apache_xsendfile:
self.headers["x-sendfile"] = os.path.abspath(path)
if not self.send_header_only and send_header_only:
# Not a head request, but nginx_x_accel_redirect_base / send_header_only, we don't send a body
self.send_header_only = True
self.headers["content-length"] = "0"

async def __call__(self, scope: "Scope", receive: "Receive", send: "Send") -> None:
if self.stat_result is None:
try:
stat_result = await anyio.to_thread.run_sync(os.stat, self.path)
self.set_stat_headers(stat_result)
except FileNotFoundError:
raise RuntimeError(f"File at path {self.path} does not exist.")
else:
mode = stat_result.st_mode
if not stat.S_ISREG(mode):
raise RuntimeError(f"File at path {self.path} is not a file.")

# This is where we diverge from the superclass, this adds support for byte range requests
start = 0
end = stat_result.st_size - 1
if not self.send_header_only:
http_range = ""
for key, value in scope["headers"]:
if key == b"range":
http_range = value.decode("latin-1")
start, end = _get_range_header(http_range, stat_result.st_size)
self.headers["content-length"] = str(end - start + 1)
self.headers["content-range"] = f"bytes {start}-{end}/{stat_result.st_size}"
self.status_code = status.HTTP_206_PARTIAL_CONTENT
break

await send(
{
"type": "http.response.start",
"status": self.status_code,
"headers": self.raw_headers,
}
)
if self.send_header_only:
await send({"type": "http.response.body", "body": b"", "more_body": False})
else:
# This also diverges from the superclass by seeking to start and limiting to end if handling byte range requests
async with await anyio.open_file(self.path, mode="rb") as file:
more_body = True
if start:
await file.seek(start)
while more_body:
if http_range:
pos = await file.tell()
read_size = min(self.chunk_size, end + 1 - pos)
if pos + read_size == end + 1:
more_body = False
else:
read_size = self.chunk_size
chunk = await file.read(read_size)
if more_body:
more_body = len(chunk) == self.chunk_size
await send(
{
"type": "http.response.body",
"body": chunk,
"more_body": more_body,
}
)
if self.background is not None:
await self.background()


# Copied from https://stackoverflow.com/questions/71222144/runtimeerror-no-response-returned-in-fastapi-when-refresh-request/72677699#72677699
class SuppressNoResponseReturnedMiddleware(BaseHTTPMiddleware):
Expand Down
14 changes: 6 additions & 8 deletions lib/galaxy/webapps/galaxy/api/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@
Query,
Request,
)
from starlette.responses import (
FileResponse,
StreamingResponse,
)
from starlette.responses import StreamingResponse

from galaxy.schema import (
FilterQueryParams,
Expand All @@ -39,6 +36,7 @@
UpdateDatasetPermissionsPayload,
)
from galaxy.util.zipstream import ZipstreamWrapper
from galaxy.webapps.base.api import GalaxyFileResponse
from galaxy.webapps.galaxy.api.common import (
get_filter_query_params,
get_query_parameters_from_request_excluding,
Expand Down Expand Up @@ -265,7 +263,7 @@ def display(
if isinstance(display_data, IOBase):
file_name = getattr(display_data, "name", None)
if file_name:
return FileResponse(file_name, headers=headers)
return GalaxyFileResponse(file_name, headers=headers, method=request.method)
elif isinstance(display_data, ZipstreamWrapper):
return StreamingResponse(display_data.response(), headers=headers)
elif isinstance(display_data, bytes):
Expand All @@ -276,12 +274,12 @@ def display(
"/api/histories/{history_id}/contents/{history_content_id}/metadata_file",
summary="Returns the metadata file associated with this history item.",
tags=["histories"],
response_class=FileResponse,
response_class=GalaxyFileResponse,
)
@router.get(
"/api/datasets/{history_content_id}/metadata_file",
summary="Returns the metadata file associated with this history item.",
response_class=FileResponse,
response_class=GalaxyFileResponse,
)
def get_metadata_file(
self,
Expand All @@ -297,7 +295,7 @@ def get_metadata_file(
),
):
metadata_file_path, headers = self.service.get_metadata_file(trans, history_content_id, metadata_file)
return FileResponse(path=cast(str, metadata_file_path), headers=headers)
return GalaxyFileResponse(path=cast(str, metadata_file_path), headers=headers)

@router.get(
"/api/datasets/{dataset_id}",
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/webapps/galaxy/api/histories.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
)
from pydantic.fields import Field
from pydantic.main import BaseModel
from starlette.responses import FileResponse

from galaxy.managers.context import (
ProvidesHistoryContext,
Expand Down Expand Up @@ -51,6 +50,7 @@
WriteStoreToPayload,
)
from galaxy.schema.types import LatestLiteral
from galaxy.webapps.base.api import GalaxyFileResponse
from galaxy.webapps.galaxy.api.common import (
get_filter_query_params,
query_serialization_params,
Expand Down Expand Up @@ -373,7 +373,7 @@ def archive_export(
"/api/histories/{id}/exports/{jeha_id}",
name="history_archive_download",
summary=("If ready and available, return raw contents of exported history as a downloadable archive."),
response_class=FileResponse,
response_class=GalaxyFileResponse,
responses={
200: {
"description": "The archive file containing the History.",
Expand All @@ -394,7 +394,7 @@ def archive_download(
jeha = self.service.get_ready_history_export(trans, id, jeha_id)
media_type = self.service.get_archive_media_type(jeha)
file_path = self.service.get_archive_download_path(trans, jeha)
return FileResponse(
return GalaxyFileResponse(
path=file_path,
media_type=media_type,
filename=jeha.export_name,
Expand Down
7 changes: 3 additions & 4 deletions lib/galaxy/webapps/galaxy/api/short_term_storage.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""
API operations around galaxy.web.short_term_storage infrastructure.
"""
from starlette.responses import FileResponse

from galaxy.web.short_term_storage import (
ShortTermStorageMonitor,
ShortTermStorageServeCancelledInformation,
ShortTermStorageServeCompletedInformation,
)
from galaxy.webapps.base.api import GalaxyFileResponse
from . import (
depends,
Router,
Expand All @@ -34,7 +33,7 @@ def is_ready(self, storage_request_id: str) -> bool:
"/api/short_term_storage/{storage_request_id}",
summary="Serve the staged download specified by request ID.",
response_description="Raw contents of the file.",
response_class=FileResponse,
response_class=GalaxyFileResponse,
responses={
200: {
"description": "The archive file containing the History.",
Expand All @@ -48,7 +47,7 @@ def serve(self, storage_request_id: str):
storage_target = self.short_term_storage_monitor.recover_target(storage_request_id)
serve_info = self.short_term_storage_monitor.get_serve_info(storage_target)
if isinstance(serve_info, ShortTermStorageServeCompletedInformation):
return FileResponse(
return GalaxyFileResponse(
path=serve_info.target.path,
media_type=serve_info.mime_type,
filename=serve_info.filename,
Expand Down
6 changes: 3 additions & 3 deletions lib/galaxy/webapps/galaxy/api/tool_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from fastapi import Path
from fastapi.responses import FileResponse

from galaxy.managers.tool_data import ToolDataManager
from galaxy.tools.data._schema import (
Expand All @@ -8,6 +7,7 @@
ToolDataField,
ToolDataItem,
)
from galaxy.webapps.base.api import GalaxyFileResponse
from . import (
depends,
Router,
Expand Down Expand Up @@ -81,7 +81,7 @@ async def show_field(
"/api/tool_data/{table_name}/fields/{field_name}/files/{file_name}",
summary="Get information about a particular field in a tool data table",
response_description="Information about a data table field",
response_class=FileResponse,
response_class=GalaxyFileResponse,
require_admin=True,
)
async def download_field_file(
Expand All @@ -96,7 +96,7 @@ async def download_field_file(
):
"""Download a file associated with the data table field."""
path = self.tool_data_manager.get_field_file_path(table_name, field_name, file_name)
return FileResponse(str(path))
return GalaxyFileResponse(str(path))

@router.delete(
"/api/tool_data/{table_name}",
Expand Down
29 changes: 4 additions & 25 deletions lib/galaxy/webapps/galaxy/fast_app.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
from pathlib import Path
from typing import cast

from a2wsgi import WSGIMiddleware
from fastapi import (
FastAPI,
Request,
)
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import (
FileResponse,
Response,
)
from starlette.responses import Response

from galaxy.version import VERSION
from galaxy.webapps.base.api import (
add_empty_response_middleware,
add_exception_handler,
add_request_id_middleware,
GalaxyFileResponse,
include_all_package_routers,
)
from galaxy.webapps.base.webapp import config_allows_origin
Expand Down Expand Up @@ -105,30 +100,14 @@ async def add_x_frame_options(request: Request, call_next):
response.headers["X-Frame-Options"] = x_frame_options
return response

nginx_x_accel_redirect_base = gx_app.config.nginx_x_accel_redirect_base
apache_xsendfile = gx_app.config.apache_xsendfile
GalaxyFileResponse.nginx_x_accel_redirect_base = gx_app.config.nginx_x_accel_redirect_base
GalaxyFileResponse.apache_xsendfile = gx_app.config.apache_xsendfile

if gx_app.config.sentry_dsn:
from sentry_sdk.integrations.asgi import SentryAsgiMiddleware

app.add_middleware(SentryAsgiMiddleware)

if nginx_x_accel_redirect_base or apache_xsendfile:

@app.middleware("http")
async def add_send_file_header(request: Request, call_next) -> Response:
response = await call_next(request)
if not isinstance(response, FileResponse):
return response
response = cast(FileResponse, response)
if not response.send_header_only:
if nginx_x_accel_redirect_base:
full_path = Path(nginx_x_accel_redirect_base) / response.path
response.headers["X-Accel-Redirect"] = str(full_path)
if apache_xsendfile:
response.headers["X-Sendfile"] = str(response.path)
return response

if gx_app.config.get("allowed_origin_hostnames", None):
app.add_middleware(
GalaxyCORSMiddleware,
Expand Down
Loading

0 comments on commit 3b068ee

Please sign in to comment.