Skip to content

Commit

Permalink
Fix image proxy get error handling (#3455)
Browse files Browse the repository at this point in the history
* Add `z` option to bind volumes to solve SELinux permissions

* Fix health check view test pook usage

* Use aiohttp client exceptions instead of requests
  • Loading branch information
sarayourfriend committed Dec 5, 2023
1 parent cdefe8d commit 9b90382
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 69 deletions.
15 changes: 7 additions & 8 deletions api/api/utils/image_proxy/__init__.py
Expand Up @@ -9,8 +9,8 @@

import aiohttp
import django_redis
import requests
import sentry_sdk
from aiohttp.client_exceptions import ClientResponseError
from asgiref.sync import sync_to_async
from sentry_sdk import push_scope, set_context

Expand Down Expand Up @@ -185,16 +185,15 @@ async def get(
"occurrences", settings.THUMBNAIL_ERROR_REPEATED_ALERT_FREQUENCY
)
sentry_sdk.capture_exception(exc)
if isinstance(exc, requests.exceptions.HTTPError):
code = exc.response.status_code
if isinstance(exc, ClientResponseError):
status = exc.status
do_not_wait_for(
tallies_incr(
f"thumbnail_http_error:{domain}:{month}:{code}:{exc.response.text}"
)
tallies_incr(f"thumbnail_http_error:{domain}:{month}:{status}")
)
logger.warning(
f"Failed to render thumbnail "
f"{upstream_url=} {code=} "
f"{media_info.media_provider=}"
f"{upstream_url=} {status=} "
f"{media_info.media_provider=} "
f"{exc.message=}"
)
raise UpstreamThumbnailException(f"Failed to render thumbnail. {exc}")
2 changes: 1 addition & 1 deletion api/test/unit/conftest.py
Expand Up @@ -59,7 +59,7 @@ def api_client():


@pytest.fixture(autouse=True)
def capture_exception(monkeypatch):
def sentry_capture_exception(monkeypatch):
mock = MagicMock()
monkeypatch.setattr("sentry_sdk.capture_exception", mock)

Expand Down
82 changes: 46 additions & 36 deletions api/test/unit/utils/test_image_proxy.py
@@ -1,7 +1,6 @@
import asyncio
from dataclasses import replace
from test.factory.models.image import ImageFactory
from unittest.mock import MagicMock
from urllib.parse import urlencode

from django.conf import settings
Expand All @@ -10,7 +9,8 @@
import aiohttp
import pook
import pytest
import requests
from aiohttp import client_exceptions
from aiohttp.client_reqrep import ConnectionKey

from api.utils.image_proxy import (
HEADERS,
Expand All @@ -23,7 +23,10 @@
from api.utils.tallies import get_monthly_timestamp


PHOTON_URL_FOR_TEST_IMAGE = f"{settings.PHOTON_ENDPOINT}subdomain.example.com/path_part1/part2/image_dot_jpg.jpg"
TEST_IMAGE_DOMAIN = "subdomain.example.com"
PHOTON_URL_FOR_TEST_IMAGE = (
f"{settings.PHOTON_ENDPOINT}{TEST_IMAGE_DOMAIN}/path_part1/part2/image_dot_jpg.jpg"
)
TEST_IMAGE_URL = PHOTON_URL_FOR_TEST_IMAGE.replace(settings.PHOTON_ENDPOINT, "http://")
TEST_MEDIA_IDENTIFIER = "123"
TEST_MEDIA_PROVIDER = "foo"
Expand All @@ -36,10 +39,6 @@

UA_HEADER = HEADERS["User-Agent"]

# cannot use actual image response because I kept running into some issue with
# requests not being able to decode the content
# I will come back to try to sort it out later but for now
# this will get the tests working.
MOCK_BODY = "mock response body"

SVG_BODY = """<?xml version="1.0" encoding="UTF-8"?>
Expand Down Expand Up @@ -306,9 +305,7 @@ def test_get_successful_records_response_code(photon_get, mock_image_data, redis
month = get_monthly_timestamp()
assert redis.get(f"thumbnail_response_code:{month}:200") == b"1"
assert (
redis.get(
f"thumbnail_response_code_by_domain:subdomain.example.com:{month}:200"
)
redis.get(f"thumbnail_response_code_by_domain:{TEST_IMAGE_DOMAIN}:{month}:200")
== b"1"
)

Expand All @@ -328,17 +325,37 @@ def test_get_successful_records_response_code(photon_get, mock_image_data, redis
],
)

MOCK_CONNECTION_KEY = ConnectionKey(
host="https://localhost",
port=None,
is_ssl=False,
ssl=None,
proxy=None,
proxy_auth=None,
proxy_headers_hash=None,
)


@pytest.mark.parametrize(
"exc, exc_name",
[
(ValueError("whoops"), "builtins.ValueError"),
(requests.ConnectionError("whoops"), "requests.exceptions.ConnectionError"),
(requests.ConnectTimeout("whoops"), "requests.exceptions.ConnectTimeout"),
(requests.ReadTimeout("whoops"), "requests.exceptions.ReadTimeout"),
(requests.Timeout("whoops"), "requests.exceptions.Timeout"),
(requests.exceptions.SSLError("whoops"), "requests.exceptions.SSLError"),
(OSError("whoops"), "builtins.OSError"),
(
client_exceptions.ClientConnectionError("whoops"),
"aiohttp.client_exceptions.ClientConnectionError",
),
(
client_exceptions.ServerTimeoutError("whoops"),
"aiohttp.client_exceptions.ServerTimeoutError",
),
(
client_exceptions.ClientSSLError(MOCK_CONNECTION_KEY, OSError()),
"aiohttp.client_exceptions.ClientSSLError",
),
(
client_exceptions.ClientOSError("whoops"),
"aiohttp.client_exceptions.ClientOSError",
),
],
)
@alert_count_params
Expand All @@ -348,22 +365,22 @@ def test_get_exception_handles_error(
exc_name,
count_start,
should_alert,
capture_exception,
sentry_capture_exception,
setup_request_exception,
redis,
):
setup_request_exception(exc)
month = get_monthly_timestamp()
key = f"thumbnail_error:{exc_name}:subdomain.example.com:{month}"
key = f"thumbnail_error:{exc_name}:{TEST_IMAGE_DOMAIN}:{month}"
redis.set(key, count_start)

with pytest.raises(UpstreamThumbnailException):
photon_get(TEST_MEDIA_INFO)

assert_func = (
capture_exception.assert_called_once
sentry_capture_exception.assert_called_once
if should_alert
else capture_exception.assert_not_called
else sentry_capture_exception.assert_not_called
)
assert_func()
assert redis.get(key) == str(count_start + 1).encode()
Expand All @@ -385,36 +402,29 @@ def test_get_http_exception_handles_error(
text,
count_start,
should_alert,
capture_exception,
setup_request_exception,
sentry_capture_exception,
redis,
):
mock_response = MagicMock(spec=requests.Response)
mock_response.status_code = status_code
mock_response.text = text
exc = requests.HTTPError(response=mock_response)
setup_request_exception(exc)

month = get_monthly_timestamp()
key = f"thumbnail_error:requests.exceptions.HTTPError:subdomain.example.com:{month}"
key = f"thumbnail_error:aiohttp.client_exceptions.ClientResponseError:{TEST_IMAGE_DOMAIN}:{month}"
redis.set(key, count_start)

with pytest.raises(UpstreamThumbnailException):
photon_get(TEST_MEDIA_INFO)
with pook.use():
pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(status_code, text).mock
photon_get(TEST_MEDIA_INFO)

assert_func = (
capture_exception.assert_called_once
sentry_capture_exception.assert_called_once
if should_alert
else capture_exception.assert_not_called
else sentry_capture_exception.assert_not_called
)
assert_func()
assert redis.get(key) == str(count_start + 1).encode()

# Assertions about the HTTP error specific message
assert (
redis.get(
f"thumbnail_http_error:subdomain.example.com:{month}:{status_code}:{text}"
)
redis.get(f"thumbnail_http_error:{TEST_IMAGE_DOMAIN}:{month}:{status_code}")
== b"1"
)

Expand Down Expand Up @@ -467,7 +477,7 @@ def test_get_unsuccessful_request_raises_custom_exception(photon_get):
("example.com/image.jpg", "jpg"),
("www.example.com/image.JPG", "jpg"),
("http://example.com/image.jpeg", "jpeg"),
("https://subdomain.example.com/image.svg", "svg"),
("https://example.com/image.svg", "svg"),
("https://example.com/image.png?foo=1&bar=2#fragment", "png"),
("https://example.com/possibleimagewithoutext", ""),
(
Expand Down
21 changes: 9 additions & 12 deletions api/test/unit/views/test_health_views.py
Expand Up @@ -32,31 +32,28 @@ def test_health_check_calls__check_db(api_client):


def test_health_check_es_timed_out(api_client):
mock_health_response(timed_out=True)
pook.on()
res = api_client.get("/healthcheck/", data={"check_es": True})
pook.off()
with pook.use():
mock_health_response(timed_out=True)
res = api_client.get("/healthcheck/", data={"check_es": True})

assert res.status_code == 503
assert res.json()["detail"] == "es_timed_out"


@pytest.mark.parametrize("status", ("yellow", "red"))
def test_health_check_es_status_bad(status, api_client):
mock_health_response(status=status)
pook.on()
res = api_client.get("/healthcheck/", data={"check_es": True})
pook.off()
with pook.use():
mock_health_response(status=status)
res = api_client.get("/healthcheck/", data={"check_es": True})

assert res.status_code == 503
assert res.json()["detail"] == f"es_status_{status}"


@pytest.mark.django_db
def test_health_check_es_all_good(api_client):
mock_health_response(status="green")
pook.on()
res = api_client.get("/healthcheck/", data={"check_es": True})
pook.off()
with pook.use():
mock_health_response(status="green")
res = api_client.get("/healthcheck/", data={"check_es": True})

assert res.status_code == 200
24 changes: 12 additions & 12 deletions docker-compose.yml
Expand Up @@ -21,7 +21,7 @@ x-airflow-common: &airflow-common
- CATALOG_PY_VERSION
- CATALOG_AIRFLOW_VERSION
volumes:
- ./catalog:/opt/airflow/catalog
- ./catalog:/opt/airflow/catalog:z
- catalog-cache:/home/airflow/.cache

services:
Expand Down Expand Up @@ -55,7 +55,7 @@ services:
- "50255:5432"
volumes:
- catalog-postgres:/var/lib/postgresql/data
- ./sample_data:/sample_data
- ./sample_data:/sample_data:z
env_file:
- docker/upstream_db/env.docker
healthcheck:
Expand All @@ -76,7 +76,7 @@ services:
command: minio server /data --address :5000 --console-address :5001
volumes:
- minio:/data
- ./docker/minio/s3_entrypoint.sh:/opt/minio/s3_entrypoint.sh:ro
- ./docker/minio/s3_entrypoint.sh:/opt/minio/s3_entrypoint.sh:ro,z
entrypoint: /opt/minio/s3_entrypoint.sh
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5010/minio/health/live"]
Expand All @@ -97,8 +97,8 @@ services:
volumes:
# Buckets for testing provider data imported from s3 are subdirectories under
# /tests/s3-data/
- ./catalog/tests/s3-data:/data:rw
- ./docker/minio/load_to_s3_entrypoint.sh:/opt/minio/load_to_s3_entrypoint.sh:ro
- ./catalog/tests/s3-data:/data:rw,z
- ./docker/minio/load_to_s3_entrypoint.sh:/opt/minio/load_to_s3_entrypoint.sh:ro,z
entrypoint: /opt/minio/load_to_s3_entrypoint.sh

# Dev changes for the scheduler
Expand Down Expand Up @@ -161,8 +161,8 @@ services:
image: docker.io/clickhouse/clickhouse-server:23.4-alpine
volumes:
- plausible-clickhouse:/var/lib/clickhouse
- ./docker/clickhouse/clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro
- ./docker/clickhouse/clickhouse-user-config.xml:/etc/clickhouse-server/users.d/logging.xml:ro
- ./docker/clickhouse/clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro,z
- ./docker/clickhouse/clickhouse-user-config.xml:/etc/clickhouse-server/users.d/logging.xml:ro,z
ulimits:
nofile:
soft: 262144
Expand Down Expand Up @@ -224,7 +224,7 @@ services:
- API_PY_VERSION
image: openverse-api
volumes:
- ./api:/api
- ./api:/api:z
ports:
- "50280:8000" # Django
depends_on:
Expand Down Expand Up @@ -255,7 +255,7 @@ services:
- es
- indexer_worker
volumes:
- ./ingestion_server:/ingestion_server
- ./ingestion_server:/ingestion_server:z
env_file:
- ingestion_server/env.docker
- ingestion_server/.env
Expand All @@ -280,7 +280,7 @@ services:
- upstream_db
- es
volumes:
- ./ingestion_server:/ingestion_server
- ./ingestion_server:/ingestion_server:z
env_file:
- ingestion_server/env.docker
stdin_open: true
Expand Down Expand Up @@ -314,8 +314,8 @@ services:
depends_on:
- web
volumes:
- ./docker/nginx/templates:/etc/nginx/templates
- ./docker/nginx/certs:/etc/nginx/certs
- ./docker/nginx/templates:/etc/nginx/templates:z
- ./docker/nginx/certs:/etc/nginx/certs:z

frontend_nginx:
profiles:
Expand Down

0 comments on commit 9b90382

Please sign in to comment.