From 9b903827d42b951fe728f94f2f3197a588a9b9f0 Mon Sep 17 00:00:00 2001 From: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> Date: Wed, 6 Dec 2023 07:41:50 +1100 Subject: [PATCH] Fix image proxy get error handling (#3455) * Add `z` option to bind volumes to solve SELinux permissions * Fix health check view test pook usage * Use aiohttp client exceptions instead of requests --- api/api/utils/image_proxy/__init__.py | 15 ++--- api/test/unit/conftest.py | 2 +- api/test/unit/utils/test_image_proxy.py | 82 +++++++++++++----------- api/test/unit/views/test_health_views.py | 21 +++--- docker-compose.yml | 24 +++---- 5 files changed, 75 insertions(+), 69 deletions(-) diff --git a/api/api/utils/image_proxy/__init__.py b/api/api/utils/image_proxy/__init__.py index e37226fcac..77edba8bde 100644 --- a/api/api/utils/image_proxy/__init__.py +++ b/api/api/utils/image_proxy/__init__.py @@ -9,8 +9,8 @@ import aiohttp import django_redis -import requests import sentry_sdk +from aiohttp.client_exceptions import ClientResponseError from asgiref.sync import sync_to_async from sentry_sdk import push_scope, set_context @@ -185,16 +185,15 @@ async def get( "occurrences", settings.THUMBNAIL_ERROR_REPEATED_ALERT_FREQUENCY ) sentry_sdk.capture_exception(exc) - if isinstance(exc, requests.exceptions.HTTPError): - code = exc.response.status_code + if isinstance(exc, ClientResponseError): + status = exc.status do_not_wait_for( - tallies_incr( - f"thumbnail_http_error:{domain}:{month}:{code}:{exc.response.text}" - ) + tallies_incr(f"thumbnail_http_error:{domain}:{month}:{status}") ) logger.warning( f"Failed to render thumbnail " - f"{upstream_url=} {code=} " - f"{media_info.media_provider=}" + f"{upstream_url=} {status=} " + f"{media_info.media_provider=} " + f"{exc.message=}" ) raise UpstreamThumbnailException(f"Failed to render thumbnail. {exc}") diff --git a/api/test/unit/conftest.py b/api/test/unit/conftest.py index 5dd314db2e..155daedf17 100644 --- a/api/test/unit/conftest.py +++ b/api/test/unit/conftest.py @@ -59,7 +59,7 @@ def api_client(): @pytest.fixture(autouse=True) -def capture_exception(monkeypatch): +def sentry_capture_exception(monkeypatch): mock = MagicMock() monkeypatch.setattr("sentry_sdk.capture_exception", mock) diff --git a/api/test/unit/utils/test_image_proxy.py b/api/test/unit/utils/test_image_proxy.py index 657dbe211b..d78255404f 100644 --- a/api/test/unit/utils/test_image_proxy.py +++ b/api/test/unit/utils/test_image_proxy.py @@ -1,7 +1,6 @@ import asyncio from dataclasses import replace from test.factory.models.image import ImageFactory -from unittest.mock import MagicMock from urllib.parse import urlencode from django.conf import settings @@ -10,7 +9,8 @@ import aiohttp import pook import pytest -import requests +from aiohttp import client_exceptions +from aiohttp.client_reqrep import ConnectionKey from api.utils.image_proxy import ( HEADERS, @@ -23,7 +23,10 @@ from api.utils.tallies import get_monthly_timestamp -PHOTON_URL_FOR_TEST_IMAGE = f"{settings.PHOTON_ENDPOINT}subdomain.example.com/path_part1/part2/image_dot_jpg.jpg" +TEST_IMAGE_DOMAIN = "subdomain.example.com" +PHOTON_URL_FOR_TEST_IMAGE = ( + f"{settings.PHOTON_ENDPOINT}{TEST_IMAGE_DOMAIN}/path_part1/part2/image_dot_jpg.jpg" +) TEST_IMAGE_URL = PHOTON_URL_FOR_TEST_IMAGE.replace(settings.PHOTON_ENDPOINT, "http://") TEST_MEDIA_IDENTIFIER = "123" TEST_MEDIA_PROVIDER = "foo" @@ -36,10 +39,6 @@ UA_HEADER = HEADERS["User-Agent"] -# cannot use actual image response because I kept running into some issue with -# requests not being able to decode the content -# I will come back to try to sort it out later but for now -# this will get the tests working. MOCK_BODY = "mock response body" SVG_BODY = """ @@ -306,9 +305,7 @@ def test_get_successful_records_response_code(photon_get, mock_image_data, redis month = get_monthly_timestamp() assert redis.get(f"thumbnail_response_code:{month}:200") == b"1" assert ( - redis.get( - f"thumbnail_response_code_by_domain:subdomain.example.com:{month}:200" - ) + redis.get(f"thumbnail_response_code_by_domain:{TEST_IMAGE_DOMAIN}:{month}:200") == b"1" ) @@ -328,17 +325,37 @@ def test_get_successful_records_response_code(photon_get, mock_image_data, redis ], ) +MOCK_CONNECTION_KEY = ConnectionKey( + host="https://localhost", + port=None, + is_ssl=False, + ssl=None, + proxy=None, + proxy_auth=None, + proxy_headers_hash=None, +) + @pytest.mark.parametrize( "exc, exc_name", [ (ValueError("whoops"), "builtins.ValueError"), - (requests.ConnectionError("whoops"), "requests.exceptions.ConnectionError"), - (requests.ConnectTimeout("whoops"), "requests.exceptions.ConnectTimeout"), - (requests.ReadTimeout("whoops"), "requests.exceptions.ReadTimeout"), - (requests.Timeout("whoops"), "requests.exceptions.Timeout"), - (requests.exceptions.SSLError("whoops"), "requests.exceptions.SSLError"), - (OSError("whoops"), "builtins.OSError"), + ( + client_exceptions.ClientConnectionError("whoops"), + "aiohttp.client_exceptions.ClientConnectionError", + ), + ( + client_exceptions.ServerTimeoutError("whoops"), + "aiohttp.client_exceptions.ServerTimeoutError", + ), + ( + client_exceptions.ClientSSLError(MOCK_CONNECTION_KEY, OSError()), + "aiohttp.client_exceptions.ClientSSLError", + ), + ( + client_exceptions.ClientOSError("whoops"), + "aiohttp.client_exceptions.ClientOSError", + ), ], ) @alert_count_params @@ -348,22 +365,22 @@ def test_get_exception_handles_error( exc_name, count_start, should_alert, - capture_exception, + sentry_capture_exception, setup_request_exception, redis, ): setup_request_exception(exc) month = get_monthly_timestamp() - key = f"thumbnail_error:{exc_name}:subdomain.example.com:{month}" + key = f"thumbnail_error:{exc_name}:{TEST_IMAGE_DOMAIN}:{month}" redis.set(key, count_start) with pytest.raises(UpstreamThumbnailException): photon_get(TEST_MEDIA_INFO) assert_func = ( - capture_exception.assert_called_once + sentry_capture_exception.assert_called_once if should_alert - else capture_exception.assert_not_called + else sentry_capture_exception.assert_not_called ) assert_func() assert redis.get(key) == str(count_start + 1).encode() @@ -385,36 +402,29 @@ def test_get_http_exception_handles_error( text, count_start, should_alert, - capture_exception, - setup_request_exception, + sentry_capture_exception, redis, ): - mock_response = MagicMock(spec=requests.Response) - mock_response.status_code = status_code - mock_response.text = text - exc = requests.HTTPError(response=mock_response) - setup_request_exception(exc) - month = get_monthly_timestamp() - key = f"thumbnail_error:requests.exceptions.HTTPError:subdomain.example.com:{month}" + key = f"thumbnail_error:aiohttp.client_exceptions.ClientResponseError:{TEST_IMAGE_DOMAIN}:{month}" redis.set(key, count_start) with pytest.raises(UpstreamThumbnailException): - photon_get(TEST_MEDIA_INFO) + with pook.use(): + pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(status_code, text).mock + photon_get(TEST_MEDIA_INFO) assert_func = ( - capture_exception.assert_called_once + sentry_capture_exception.assert_called_once if should_alert - else capture_exception.assert_not_called + else sentry_capture_exception.assert_not_called ) assert_func() assert redis.get(key) == str(count_start + 1).encode() # Assertions about the HTTP error specific message assert ( - redis.get( - f"thumbnail_http_error:subdomain.example.com:{month}:{status_code}:{text}" - ) + redis.get(f"thumbnail_http_error:{TEST_IMAGE_DOMAIN}:{month}:{status_code}") == b"1" ) @@ -467,7 +477,7 @@ def test_get_unsuccessful_request_raises_custom_exception(photon_get): ("example.com/image.jpg", "jpg"), ("www.example.com/image.JPG", "jpg"), ("http://example.com/image.jpeg", "jpeg"), - ("https://subdomain.example.com/image.svg", "svg"), + ("https://example.com/image.svg", "svg"), ("https://example.com/image.png?foo=1&bar=2#fragment", "png"), ("https://example.com/possibleimagewithoutext", ""), ( diff --git a/api/test/unit/views/test_health_views.py b/api/test/unit/views/test_health_views.py index 720c18398d..d967e098fd 100644 --- a/api/test/unit/views/test_health_views.py +++ b/api/test/unit/views/test_health_views.py @@ -32,10 +32,9 @@ def test_health_check_calls__check_db(api_client): def test_health_check_es_timed_out(api_client): - mock_health_response(timed_out=True) - pook.on() - res = api_client.get("/healthcheck/", data={"check_es": True}) - pook.off() + with pook.use(): + mock_health_response(timed_out=True) + res = api_client.get("/healthcheck/", data={"check_es": True}) assert res.status_code == 503 assert res.json()["detail"] == "es_timed_out" @@ -43,10 +42,9 @@ def test_health_check_es_timed_out(api_client): @pytest.mark.parametrize("status", ("yellow", "red")) def test_health_check_es_status_bad(status, api_client): - mock_health_response(status=status) - pook.on() - res = api_client.get("/healthcheck/", data={"check_es": True}) - pook.off() + with pook.use(): + mock_health_response(status=status) + res = api_client.get("/healthcheck/", data={"check_es": True}) assert res.status_code == 503 assert res.json()["detail"] == f"es_status_{status}" @@ -54,9 +52,8 @@ def test_health_check_es_status_bad(status, api_client): @pytest.mark.django_db def test_health_check_es_all_good(api_client): - mock_health_response(status="green") - pook.on() - res = api_client.get("/healthcheck/", data={"check_es": True}) - pook.off() + with pook.use(): + mock_health_response(status="green") + res = api_client.get("/healthcheck/", data={"check_es": True}) assert res.status_code == 200 diff --git a/docker-compose.yml b/docker-compose.yml index 010fb13572..0053062fe6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,7 +21,7 @@ x-airflow-common: &airflow-common - CATALOG_PY_VERSION - CATALOG_AIRFLOW_VERSION volumes: - - ./catalog:/opt/airflow/catalog + - ./catalog:/opt/airflow/catalog:z - catalog-cache:/home/airflow/.cache services: @@ -55,7 +55,7 @@ services: - "50255:5432" volumes: - catalog-postgres:/var/lib/postgresql/data - - ./sample_data:/sample_data + - ./sample_data:/sample_data:z env_file: - docker/upstream_db/env.docker healthcheck: @@ -76,7 +76,7 @@ services: command: minio server /data --address :5000 --console-address :5001 volumes: - minio:/data - - ./docker/minio/s3_entrypoint.sh:/opt/minio/s3_entrypoint.sh:ro + - ./docker/minio/s3_entrypoint.sh:/opt/minio/s3_entrypoint.sh:ro,z entrypoint: /opt/minio/s3_entrypoint.sh healthcheck: test: ["CMD", "curl", "-f", "http://localhost:5010/minio/health/live"] @@ -97,8 +97,8 @@ services: volumes: # Buckets for testing provider data imported from s3 are subdirectories under # /tests/s3-data/ - - ./catalog/tests/s3-data:/data:rw - - ./docker/minio/load_to_s3_entrypoint.sh:/opt/minio/load_to_s3_entrypoint.sh:ro + - ./catalog/tests/s3-data:/data:rw,z + - ./docker/minio/load_to_s3_entrypoint.sh:/opt/minio/load_to_s3_entrypoint.sh:ro,z entrypoint: /opt/minio/load_to_s3_entrypoint.sh # Dev changes for the scheduler @@ -161,8 +161,8 @@ services: image: docker.io/clickhouse/clickhouse-server:23.4-alpine volumes: - plausible-clickhouse:/var/lib/clickhouse - - ./docker/clickhouse/clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro - - ./docker/clickhouse/clickhouse-user-config.xml:/etc/clickhouse-server/users.d/logging.xml:ro + - ./docker/clickhouse/clickhouse-config.xml:/etc/clickhouse-server/config.d/logging.xml:ro,z + - ./docker/clickhouse/clickhouse-user-config.xml:/etc/clickhouse-server/users.d/logging.xml:ro,z ulimits: nofile: soft: 262144 @@ -224,7 +224,7 @@ services: - API_PY_VERSION image: openverse-api volumes: - - ./api:/api + - ./api:/api:z ports: - "50280:8000" # Django depends_on: @@ -255,7 +255,7 @@ services: - es - indexer_worker volumes: - - ./ingestion_server:/ingestion_server + - ./ingestion_server:/ingestion_server:z env_file: - ingestion_server/env.docker - ingestion_server/.env @@ -280,7 +280,7 @@ services: - upstream_db - es volumes: - - ./ingestion_server:/ingestion_server + - ./ingestion_server:/ingestion_server:z env_file: - ingestion_server/env.docker stdin_open: true @@ -314,8 +314,8 @@ services: depends_on: - web volumes: - - ./docker/nginx/templates:/etc/nginx/templates - - ./docker/nginx/certs:/etc/nginx/certs + - ./docker/nginx/templates:/etc/nginx/templates:z + - ./docker/nginx/certs:/etc/nginx/certs:z frontend_nginx: profiles: