Skip to content

Commit

Permalink
Restrict file downloads to Project (#49680)
Browse files Browse the repository at this point in the history
The `debug_files` and `artifact_lookup` endpoint are now restricting
file downloads to the specific project according to the route
parameters.
  • Loading branch information
Swatinem committed May 24, 2023
1 parent 8e308bb commit e932b15
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 53 deletions.
65 changes: 47 additions & 18 deletions src/sentry/api/endpoints/artifact_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
ArtifactBundle,
DebugIdArtifactBundle,
Distribution,
File,
Project,
ProjectArtifactBundle,
Release,
Expand All @@ -45,23 +44,43 @@
class ProjectArtifactLookupEndpoint(ProjectEndpoint):
permission_classes = (ProjectReleasePermission,)

def download_file(self, file_id, project: Project):
def download_file(self, download_id, project: Project):
ty, ty_id = download_id.split("/")

rate_limited = ratelimits.is_limited(
project=project,
key=f"rl:ArtifactLookupEndpoint:download:{file_id}:{project.id}",
key=f"rl:ArtifactLookupEndpoint:download:{download_id}:{project.id}",
limit=10,
)
if rate_limited:
logger.info(
"notification.rate_limited",
extra={"project_id": project.id, "file_id": file_id},
extra={"project_id": project.id, "file_id": download_id},
)
return HttpResponse({"Too many download requests"}, status=429)

file = File.objects.filter(id=file_id).first()
file = None
if ty == "artifact_bundle":
file = (
ArtifactBundle.objects.filter(
id=ty_id,
projectartifactbundle__project_id=project.id,
)
.select_related("file")
.first()
)
elif ty == "release_file":
# NOTE: `ReleaseFile` does have a `project_id`, but that seems to
# be always empty, so using the `organization_id` instead.
file = (
ReleaseFile.objects.filter(id=ty_id, organization_id=project.organization.id)
.select_related("file")
.first()
)

if file is None:
raise Http404
file = file.file

try:
fp = file.getfile()
Expand Down Expand Up @@ -93,9 +112,9 @@ def get(self, request: Request, project: Project) -> Response:
:auth: required
"""
if request.GET.get("download") is not None:
if (download_id := request.GET.get("download")) is not None:
if has_download_permission(request, project):
return self.download_file(request.GET.get("download"), project)
return self.download_file(download_id, project)
else:
return Response(status=403)

Expand All @@ -114,7 +133,7 @@ def get(self, request: Request, project: Project) -> Response:
def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
for (bundle_id, date_added, file_id) in inner_bundles:
used_artifact_bundles[bundle_id] = date_added
bundle_file_ids.add(file_id)
bundle_file_ids.add(("artifact_bundle", bundle_id, file_id))

if debug_id:
bundles = get_artifact_bundles_containing_debug_id(debug_id, project)
Expand All @@ -132,7 +151,8 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):

release, dist = try_resolve_release_dist(project, release_name, dist_name)
if release:
bundle_file_ids |= get_legacy_release_bundles(release, dist)
for (releasefile_id, file_id) in get_legacy_release_bundles(release, dist):
bundle_file_ids.add(("release_file", releasefile_id, file_id))
individual_files = get_legacy_releasefile_by_file_url(release, dist, url)

if options.get("sourcemaps.artifact-bundles.enable-renewal") == 1.0:
Expand All @@ -144,12 +164,16 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
url_constructor = UrlConstructor(request, project)

found_artifacts = []
for file_id in bundle_file_ids:
# NOTE: the reason we use the `file_id` as the `id` we return is because
# downstream symbolicator relies on that for its internal caching.
# We do not want to hard-refresh those caches quite yet, and the `id`
# should also be as unique as possible, which the `file_id` is.
for (ty, ty_id, file_id) in bundle_file_ids:
found_artifacts.append(
{
"id": str(file_id),
"type": "bundle",
"url": url_constructor.url_for_file_id(file_id),
"url": url_constructor.url_for_file_id(ty, ty_id),
}
)

Expand All @@ -158,7 +182,7 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
{
"id": str(release_file.file.id),
"type": "file",
"url": url_constructor.url_for_file_id(release_file.file.id),
"url": url_constructor.url_for_file_id("release_file", release_file.id),
# The `name` is the url/abs_path of the file,
# as in: `"~/path/to/file.min.js"`.
"abs_path": release_file.name,
Expand All @@ -167,6 +191,9 @@ def update_bundles(inner_bundles: Set[Tuple[int, datetime, int]]):
}
)

# make sure we have a stable sort order for tests
found_artifacts.sort(key=lambda x: int(x["id"]))

# NOTE: We do not paginate this response, as we have very tight limits on all the individual queries.
return Response(serialize(found_artifacts, request.user))

Expand Down Expand Up @@ -259,10 +286,11 @@ def try_resolve_release_dist(
return release, dist


def get_legacy_release_bundles(release: Release, dist: Optional[Distribution]):
def get_legacy_release_bundles(
release: Release, dist: Optional[Distribution]
) -> Set[Tuple[int, int]]:
return set(
ReleaseFile.objects.select_related("file")
.filter(
ReleaseFile.objects.filter(
release_id=release.id,
dist_id=dist.id if dist else None,
# a `ReleaseFile` with `0` artifacts represents a release archive,
Expand All @@ -271,7 +299,8 @@ def get_legacy_release_bundles(release: Release, dist: Optional[Distribution]):
# similarly the special `type` is also used for release archives.
file__type=RELEASE_BUNDLE_TYPE,
)
.values_list("file_id", flat=True)
.select_related("file")
.values_list("id", "file_id")
# TODO: this `order_by` might be incredibly slow
# we want to have a hard limit on the returned bundles here. and we would
# want to pick the most recently uploaded ones. that should mostly be
Expand Down Expand Up @@ -304,11 +333,11 @@ def __init__(self, request: Request, project: Project):
else:
self.base_url = request.build_absolute_uri(request.path)

def url_for_file_id(self, file_id: int) -> str:
def url_for_file_id(self, ty: str, file_id: int) -> str:
# NOTE: Returning a self-route that requires authentication (via Bearer token)
# is not really forward compatible with a pre-signed URL that does not
# require any authentication or headers whatsoever.
# This also requires a workaround in Symbolicator, as its generic http
# downloader blocks "internal" IPs, whereas the internal Sentry downloader
# is explicitly exempt.
return f"{self.base_url}?download={file_id}"
return f"{self.base_url}?download={ty}/{file_id}"
6 changes: 4 additions & 2 deletions src/sentry/api/endpoints/debug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,11 @@ def download(self, debug_file_id, project):
"notification.rate_limited",
extra={"project_id": project.id, "project_debug_file_id": debug_file_id},
)
return HttpResponse({"Too many download requests"}, status=403)
return HttpResponse({"Too many download requests"}, status=429)

debug_file = ProjectDebugFile.objects.filter(id=debug_file_id).first()
debug_file = ProjectDebugFile.objects.filter(
id=debug_file_id, project_id=project.id
).first()

if debug_file is None:
raise Http404
Expand Down
Loading

0 comments on commit e932b15

Please sign in to comment.