From b08bff585364e0df49eb92e3d4101d86e28f5167 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Wed, 6 Apr 2022 12:25:10 +0400 Subject: [PATCH 01/16] Update thumbnail generation code --- api/catalog/api/views/audio_views.py | 6 +-- api/catalog/api/views/image_views.py | 6 +-- api/catalog/api/views/media_views.py | 67 ++++++++++++++++++++++------ api/catalog/settings.py | 4 +- api/env.docker | 2 +- api/env.template | 6 ++- docker-compose.yml | 9 ++-- 7 files changed, 71 insertions(+), 29 deletions(-) diff --git a/api/catalog/api/views/audio_views.py b/api/catalog/api/views/audio_views.py index 9bf2b7021..6ff4dd850 100644 --- a/api/catalog/api/views/audio_views.py +++ b/api/catalog/api/views/audio_views.py @@ -64,11 +64,7 @@ def thumbnail(self, request, *_, **__): if not image_url: raise get_api_exception("Could not find artwork.", 404) - is_full_size = request.query_params.get("full_size", False) - if is_full_size: - return self._get_proxied_image(image_url, None) - else: - return self._get_proxied_image(image_url) + return super().thumbnail(image_url, request) @action( detail=True, diff --git a/api/catalog/api/views/image_views.py b/api/catalog/api/views/image_views.py index c506f5762..f5c038f5b 100644 --- a/api/catalog/api/views/image_views.py +++ b/api/catalog/api/views/image_views.py @@ -100,11 +100,7 @@ def thumbnail(self, request, *_, **__): if not image_url: raise get_api_exception("Could not find image.", 404) - is_full_size = request.query_params.get("full_size", False) - if is_full_size: - return self._get_proxied_image(image_url, None) - else: - return self._get_proxied_image(image_url) + return super().thumbnail(image_url, request) @action(detail=True, url_path="watermark", url_name="watermark") def watermark(self, request, *_, **__): diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index 63c33fdbd..2ee8c424c 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -1,4 +1,8 @@ +import json +import logging as log +from distutils.util import strtobool from urllib.error import HTTPError +from urllib.parse import urlencode from urllib.request import urlopen from catalog.api.controllers import search_controller @@ -124,6 +128,17 @@ def report(self, request, *_, **__): serializer = self.get_serializer(report) return Response(data=serializer.data, status=status.HTTP_201_CREATED) + def thumbnail(self, image_url, request, *_, **__): + full_size_param = request.query_params.get("full_size", "false").lower() + is_full_size = strtobool(full_size_param) + compressed_param = request.query_params.get( + "compressed", + "false" if is_full_size else "true", + ).lower() + is_compressed = strtobool(compressed_param) + + return self._get_proxied_image(image_url, is_full_size, is_compressed) + # Helper functions @staticmethod @@ -143,24 +158,50 @@ def _get_user_ip(request): return ip @staticmethod - def _get_proxied_image(image_url, width=settings.THUMBNAIL_WIDTH_PX): - if width is None: # full size - proxy_upstream = f"{settings.THUMBNAIL_PROXY_URL}/{image_url}" - else: - proxy_upstream = ( - f"{settings.THUMBNAIL_PROXY_URL}/" - f"{settings.THUMBNAIL_WIDTH_PX},fit/" - f"{image_url}" - ) + def _thumbnail_proxy_comm(path: str, params: dict): + proxy_url = settings.THUMBNAIL_PROXY_URL + query_string = urlencode(params) + upstream_url = f"{proxy_url}/{path}?{query_string}" + log.info(f"Upstream URL: {upstream_url}") + try: - upstream_response = urlopen(proxy_upstream) - status = upstream_response.status + upstream_response = urlopen(upstream_url) + + res_status = upstream_response.status + log.info(f"Response status: {res_status}") + content_type = upstream_response.headers.get("Content-Type") + log.info(f"Response Content-Type: {content_type}") + + return upstream_response, res_status, content_type except HTTPError: raise get_api_exception("Failed to render thumbnail.") + @staticmethod + def _get_proxied_image( + image_url: str, + is_full_size: bool = False, + is_compressed: bool = True, + ): + info_res, *_ = MediaViewSet._thumbnail_proxy_comm("info", {"url": image_url}) + info = json.loads(info_res.read()) + + path = "resize" + params = { + "url": image_url, + "width": info["width"] if is_full_size else settings.THUMBNAIL_WIDTH_PX, + } + if is_compressed: + params |= { + "quality": settings.THUMBNAIL_JPG_QUALITY, + "compression": settings.THUMBNAIL_PNG_COMPRESSION, + } + else: + params |= {"quality": 100, "compression": 0} + img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm( + path, params + ) response = HttpResponse( - upstream_response.read(), status=status, content_type=content_type + img_res.read(), status=res_status, content_type=content_type ) - return response diff --git a/api/catalog/settings.py b/api/catalog/settings.py index 234cd6581..76bb4ba88 100644 --- a/api/catalog/settings.py +++ b/api/catalog/settings.py @@ -183,7 +183,9 @@ # Produce CC-hosted thumbnails dynamically through a proxy. THUMBNAIL_PROXY_URL = config("THUMBNAIL_PROXY_URL", default="http://localhost:8222") -THUMBNAIL_WIDTH_PX = 600 +THUMBNAIL_WIDTH_PX = config("THUMBNAIL_WIDTH_PX", cast=int, default=600) +THUMBNAIL_JPG_QUALITY = config("THUMBNAIL_JPG_QUALITY", cast=int, default=80) +THUMBNAIL_PNG_COMPRESSION = config("THUMBNAIL_PNG_COMPRESSION", cast=int, default=6) AUTHENTICATION_BACKENDS = ( "oauth2_provider.backends.OAuth2Backend", diff --git a/api/env.docker b/api/env.docker index e76c27c5a..d2a78d94d 100644 --- a/api/env.docker +++ b/api/env.docker @@ -6,7 +6,7 @@ DJANGO_DEBUG_ENABLED="True" REDIS_HOST="cache" -THUMBNAIL_PROXY_URL="http://thumbs:8222" +THUMBNAIL_PROXY_URL="http://thumbnails:8222" DJANGO_DATABASE_HOST="db" diff --git a/api/env.template b/api/env.template index a59c4c706..556985cc4 100644 --- a/api/env.template +++ b/api/env.template @@ -15,7 +15,11 @@ DJANGO_DEBUG_ENABLED="True" #REDIS_PORT="6379" #REDIS_PASSWORD="" -#THUMBNAIL_PROXY_URL="http://thumbs:8222" +#THUMBNAIL_PROXY_URL="http://thumbnails:8222" + +#THUMBNAIL_WIDTH_PX="600" +#THUMBNAIL_JPG_QUALITY="80" +#THUMBNAIL_PNG_COMPRESSION="6" #DJANGO_DATABASE_HOST="db" #DJANGO_DATABASE_PORT="5432" diff --git a/docker-compose.yml b/docker-compose.yml index 8d2649ff7..f28c68c36 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,11 +11,14 @@ services: volumes: - api-postgres:/var/lib/postgresql/data - thumbs: - image: willnorris/imageproxy + thumbnails: + image: h2non/imaginary:latest ports: - "8222:8222" - command: ["-addr", "0.0.0.0:8222"] + environment: + PORT: 8222 + MALLOC_ARENA_MAX: 2 + command: ["-enable-url-source"] upstream_db: image: postgres:13.2-alpine From 7b7a178658d30dcbacbe3b26957288beb6d00b82 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Wed, 6 Apr 2022 13:33:25 +0400 Subject: [PATCH 02/16] Update documentation --- api/docs/guides/quickstart.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/docs/guides/quickstart.md b/api/docs/guides/quickstart.md index 36e97110c..d277d4cbc 100644 --- a/api/docs/guides/quickstart.md +++ b/api/docs/guides/quickstart.md @@ -79,7 +79,7 @@ The command `just up` spawns the following services: - API application database - [Elasticsearch](https://www.elastic.co/elasticsearch/) - [Redis](https://redis.io/) -- [imageproxy](https://github.com/willnorris/imageproxy) +- [imaginary](https://github.com/h2non/imaginary) - [NGINX](http://nginx.org) - **web** (`api/`) - **ingestion_server** and **indexer_worker** (`ingestion_server/`) From d4315a90eaeb7be1926beb7cc6cd285469c5d5ca Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Wed, 6 Apr 2022 13:46:33 +0400 Subject: [PATCH 03/16] Add integration tests for compression functionality --- api/test/audio_integration_test.py | 5 +++++ api/test/image_integration_test.py | 5 +++++ api/test/media_integration.py | 11 +++++++++++ 3 files changed, 21 insertions(+) diff --git a/api/test/audio_integration_test.py b/api/test/audio_integration_test.py index f545cdb62..894b576a8 100644 --- a/api/test/audio_integration_test.py +++ b/api/test/audio_integration_test.py @@ -17,6 +17,7 @@ search_special_chars, stats, thumb, + thumb_compression, ) import pytest @@ -73,5 +74,9 @@ def test_audio_thumb(audio_fixture): thumb(audio_fixture) +def test_audio_thumb_compression(audio_fixture): + thumb_compression(audio_fixture) + + def test_audio_report(audio_fixture): report("audio", audio_fixture) diff --git a/api/test/image_integration_test.py b/api/test/image_integration_test.py index e892a42c4..0c054bccf 100644 --- a/api/test/image_integration_test.py +++ b/api/test/image_integration_test.py @@ -17,6 +17,7 @@ search_special_chars, stats, thumb, + thumb_compression, ) from urllib.parse import urlencode @@ -72,6 +73,10 @@ def test_image_thumb(image_fixture): thumb(image_fixture) +def test_image_thumb_compression(image_fixture): + thumb_compression(image_fixture) + + def test_audio_report(image_fixture): report("images", image_fixture) diff --git a/api/test/media_integration.py b/api/test/media_integration.py index 9bc1c3409..6b84b0e17 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -105,6 +105,17 @@ def thumb(fixture): assert thumbnail_response.headers["Content-Type"].startswith("image/") +def thumb_compression(fixture): + thumbnail_url = fixture["results"][0]["thumbnail"] + + thumbnail_response = requests.get(thumbnail_url) + compressed_size = len(thumbnail_response.content) + thumbnail_response = requests.get(f"{thumbnail_url}?compressed=no") + actual_size = len(thumbnail_response.content) + + assert compressed_size < actual_size + + def report(media_type, fixture): test_id = fixture["results"][0]["id"] response = requests.post( From 5c4da6616d27c01d3adcf6755c56b34a22127371 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Wed, 6 Apr 2022 18:07:40 +0400 Subject: [PATCH 04/16] Use serializer to document the API endpoint --- api/catalog/api/docs/audio_docs.py | 16 ++++++++++ api/catalog/api/docs/image_docs.py | 15 ++++++++++ .../api/serializers/media_serializers.py | 30 +++++++++++++++++++ api/catalog/api/views/audio_views.py | 5 +++- api/catalog/api/views/image_views.py | 5 +++- api/catalog/api/views/media_views.py | 14 +++------ 6 files changed, 73 insertions(+), 12 deletions(-) diff --git a/api/catalog/api/docs/audio_docs.py b/api/catalog/api/docs/audio_docs.py index 6d89d07a1..a2dd81685 100644 --- a/api/catalog/api/docs/audio_docs.py +++ b/api/catalog/api/docs/audio_docs.py @@ -5,6 +5,7 @@ MediaSearch, MediaStats, fields_to_md, + refer_sample, ) from catalog.api.examples import ( audio_complain_201_example, @@ -31,6 +32,7 @@ InputErrorSerializer, NotFoundErrorSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.serializers.provider_serializers import ProviderSerializer from drf_yasg import openapi @@ -206,3 +208,17 @@ class AudioComplain(MediaComplain): "responses": responses, "code_examples": code_examples, } + + +class AudioThumbnail: + desc = f""" +thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail +of the artwork of an audio track or its audio set. + +{refer_sample}""" # noqa + + swagger_setup = { + "operation_id": "audio_thumbnail", + "operation_description": desc, + "query_serializer": MediaThumbnailRequestSerializer, + } diff --git a/api/catalog/api/docs/image_docs.py b/api/catalog/api/docs/image_docs.py index 6bccc2d41..03fdb0b58 100644 --- a/api/catalog/api/docs/image_docs.py +++ b/api/catalog/api/docs/image_docs.py @@ -37,6 +37,7 @@ OembedRequestSerializer, OembedSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.serializers.provider_serializers import ProviderSerializer from drf_yasg import openapi @@ -240,3 +241,17 @@ class ImageOembed: "responses": responses, "code_examples": code_examples, } + + +class ImageThumbnail: + desc = f""" +thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail +of an image. + +{refer_sample}""" # noqa + + swagger_setup = { + "operation_id": "image_thumbnail", + "operation_description": desc, + "query_serializer": MediaThumbnailRequestSerializer, + } diff --git a/api/catalog/api/serializers/media_serializers.py b/api/catalog/api/serializers/media_serializers.py index 6bb046a49..570bc8008 100644 --- a/api/catalog/api/serializers/media_serializers.py +++ b/api/catalog/api/serializers/media_serializers.py @@ -1,3 +1,4 @@ +import logging as log from collections import namedtuple from urllib.parse import urlparse @@ -420,3 +421,32 @@ class MediaSearchSerializer(serializers.Serializer): page = serializers.IntegerField( help_text="The current page number returned in the response." ) + + +class MediaThumbnailRequestSerializer(serializers.Serializer): + """ + This serializer parses and validates thumbnail query string parameters. + """ + + full_size = serializers.BooleanField( + source="is_full_size", + allow_null=True, + required=False, + default=False, + help_text="whether to render the actual image and not a thumbnail version", + ) + compressed = serializers.BooleanField( + source="is_compressed", + allow_null=True, + default=None, + required=False, + help_text="whether to compress the output image to reduce file size," + "defaults to opposite of `full_size`", + ) + + def validate(self, data): + log.info(f"MediaThumbnailRequestSerializer data: {data}") + if data.get("is_compressed") is None: + data["is_compressed"] = not data["is_full_size"] + log.info(f"MediaThumbnailRequestSerializer validated data: {data}") + return data diff --git a/api/catalog/api/views/audio_views.py b/api/catalog/api/views/audio_views.py index 6ff4dd850..7791a0c6d 100644 --- a/api/catalog/api/views/audio_views.py +++ b/api/catalog/api/views/audio_views.py @@ -6,6 +6,7 @@ AudioRelated, AudioSearch, AudioStats, + AudioThumbnail, ) from catalog.api.models import Audio from catalog.api.serializers.audio_serializers import ( @@ -14,6 +15,7 @@ AudioSerializer, AudioWaveformSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.utils.exceptions import get_api_exception from catalog.api.utils.throttle import OneThousandPerMinute from catalog.api.views.media_views import MediaViewSet @@ -31,7 +33,7 @@ @method_decorator(swagger_auto_schema(**AudioDetail.swagger_setup), "retrieve") @method_decorator(swagger_auto_schema(**AudioRelated.swagger_setup), "related") @method_decorator(swagger_auto_schema(**AudioComplain.swagger_setup), "report") -@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail") +@method_decorator(swagger_auto_schema(**AudioThumbnail.swagger_setup), "thumbnail") @method_decorator(swagger_auto_schema(auto_schema=None), "waveform") class AudioViewSet(MediaViewSet): """ @@ -51,6 +53,7 @@ class AudioViewSet(MediaViewSet): detail=True, url_path="thumb", url_name="thumb", + serializer_class=MediaThumbnailRequestSerializer, throttle_classes=[OneThousandPerMinute], ) def thumbnail(self, request, *_, **__): diff --git a/api/catalog/api/views/image_views.py b/api/catalog/api/views/image_views.py index f5c038f5b..d2ab23e97 100644 --- a/api/catalog/api/views/image_views.py +++ b/api/catalog/api/views/image_views.py @@ -10,6 +10,7 @@ ImageRelated, ImageSearch, ImageStats, + ImageThumbnail, ) from catalog.api.models import Image from catalog.api.serializers.image_serializers import ( @@ -20,6 +21,7 @@ OembedSerializer, WatermarkRequestSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.utils.exceptions import get_api_exception from catalog.api.utils.throttle import OneThousandPerMinute from catalog.api.utils.watermark import watermark @@ -42,7 +44,7 @@ @method_decorator(swagger_auto_schema(**ImageRelated.swagger_setup), "related") @method_decorator(swagger_auto_schema(**ImageComplain.swagger_setup), "report") @method_decorator(swagger_auto_schema(**ImageOembed.swagger_setup), "oembed") -@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail") +@method_decorator(swagger_auto_schema(**ImageThumbnail.swagger_setup), "thumbnail") @method_decorator(swagger_auto_schema(auto_schema=None), "watermark") class ImageViewSet(MediaViewSet): """ @@ -91,6 +93,7 @@ def oembed(self, request, *_, **__): detail=True, url_path="thumb", url_name="thumb", + serializer_class=MediaThumbnailRequestSerializer, throttle_classes=[OneThousandPerMinute], ) def thumbnail(self, request, *_, **__): diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index 2ee8c424c..62ce01add 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -1,6 +1,5 @@ import json import logging as log -from distutils.util import strtobool from urllib.error import HTTPError from urllib.parse import urlencode from urllib.request import urlopen @@ -129,15 +128,10 @@ def report(self, request, *_, **__): return Response(data=serializer.data, status=status.HTTP_201_CREATED) def thumbnail(self, image_url, request, *_, **__): - full_size_param = request.query_params.get("full_size", "false").lower() - is_full_size = strtobool(full_size_param) - compressed_param = request.query_params.get( - "compressed", - "false" if is_full_size else "true", - ).lower() - is_compressed = strtobool(compressed_param) - - return self._get_proxied_image(image_url, is_full_size, is_compressed) + serializer = self.get_serializer(data=request.query_params) + if not serializer.is_valid(): + raise get_api_exception("Invalid input.", 400) + return self._get_proxied_image(image_url, **serializer.validated_data) # Helper functions From cc7c69b7501c251401ac4067a153619726f61547 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 8 Apr 2022 09:05:25 +0400 Subject: [PATCH 05/16] Remove `#noqa` comments --- api/catalog/api/docs/audio_docs.py | 2 +- api/catalog/api/docs/image_docs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/catalog/api/docs/audio_docs.py b/api/catalog/api/docs/audio_docs.py index a2dd81685..ee33d7fe1 100644 --- a/api/catalog/api/docs/audio_docs.py +++ b/api/catalog/api/docs/audio_docs.py @@ -215,7 +215,7 @@ class AudioThumbnail: thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail of the artwork of an audio track or its audio set. -{refer_sample}""" # noqa +{refer_sample}""" swagger_setup = { "operation_id": "audio_thumbnail", diff --git a/api/catalog/api/docs/image_docs.py b/api/catalog/api/docs/image_docs.py index 03fdb0b58..4f2ea54a6 100644 --- a/api/catalog/api/docs/image_docs.py +++ b/api/catalog/api/docs/image_docs.py @@ -248,7 +248,7 @@ class ImageThumbnail: thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail of an image. -{refer_sample}""" # noqa +{refer_sample}""" swagger_setup = { "operation_id": "image_thumbnail", From 83342f76c3577b7527154f846287cad4b2d815a9 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 8 Apr 2022 09:06:09 +0400 Subject: [PATCH 06/16] Remove redundant logs --- api/catalog/api/serializers/media_serializers.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/api/catalog/api/serializers/media_serializers.py b/api/catalog/api/serializers/media_serializers.py index 570bc8008..24689b9f5 100644 --- a/api/catalog/api/serializers/media_serializers.py +++ b/api/catalog/api/serializers/media_serializers.py @@ -1,4 +1,3 @@ -import logging as log from collections import namedtuple from urllib.parse import urlparse @@ -445,8 +444,6 @@ class MediaThumbnailRequestSerializer(serializers.Serializer): ) def validate(self, data): - log.info(f"MediaThumbnailRequestSerializer data: {data}") if data.get("is_compressed") is None: data["is_compressed"] = not data["is_full_size"] - log.info(f"MediaThumbnailRequestSerializer validated data: {data}") return data From 930b03942c08cbf29c6b0620b95249536175525f Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 8 Apr 2022 09:08:42 +0400 Subject: [PATCH 07/16] Reduce logging and downgrade severity to debug --- api/catalog/api/views/media_views.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index 62ce01add..f9d3483e8 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -156,16 +156,17 @@ def _thumbnail_proxy_comm(path: str, params: dict): proxy_url = settings.THUMBNAIL_PROXY_URL query_string = urlencode(params) upstream_url = f"{proxy_url}/{path}?{query_string}" - log.info(f"Upstream URL: {upstream_url}") + log.debug(f"Image proxy upstream URL: {upstream_url}") try: upstream_response = urlopen(upstream_url) res_status = upstream_response.status - log.info(f"Response status: {res_status}") - content_type = upstream_response.headers.get("Content-Type") - log.info(f"Response Content-Type: {content_type}") + log.debug( + "Image proxy response " + f"status: {res_status}, content-type: {content_type}" + ) return upstream_response, res_status, content_type except HTTPError: From 0894ed3b4999db7b9e926bd92c6e1108ea77334f Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 8 Apr 2022 10:48:52 +0400 Subject: [PATCH 08/16] Use client-side accept header to automatically use WEBP output --- api/catalog/api/views/media_views.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index f9d3483e8..40377d9aa 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -1,8 +1,9 @@ import json import logging as log +from typing import List from urllib.error import HTTPError from urllib.parse import urlencode -from urllib.request import urlopen +from urllib.request import Request, urlopen from catalog.api.controllers import search_controller from catalog.api.models import ContentProvider @@ -131,7 +132,11 @@ def thumbnail(self, image_url, request, *_, **__): serializer = self.get_serializer(data=request.query_params) if not serializer.is_valid(): raise get_api_exception("Invalid input.", 400) - return self._get_proxied_image(image_url, **serializer.validated_data) + return self._get_proxied_image( + image_url, + accept_header=request.headers.get("Accept", "image/*"), + **serializer.validated_data, + ) # Helper functions @@ -152,14 +157,24 @@ def _get_user_ip(request): return ip @staticmethod - def _thumbnail_proxy_comm(path: str, params: dict): + def _thumbnail_proxy_comm( + path: str, + params: dict, + headers: List[ + tuple[str, str] + ] = None, # ``List`` because there is a ``list`` function + ): proxy_url = settings.THUMBNAIL_PROXY_URL query_string = urlencode(params) upstream_url = f"{proxy_url}/{path}?{query_string}" log.debug(f"Image proxy upstream URL: {upstream_url}") try: - upstream_response = urlopen(upstream_url) + req = Request(upstream_url) + if headers: + for key, val in headers: + req.add_header(key, val) + upstream_response = urlopen(req) res_status = upstream_response.status content_type = upstream_response.headers.get("Content-Type") @@ -175,6 +190,7 @@ def _thumbnail_proxy_comm(path: str, params: dict): @staticmethod def _get_proxied_image( image_url: str, + accept_header: str = "image/*", is_full_size: bool = False, is_compressed: bool = True, ): @@ -190,11 +206,12 @@ def _get_proxied_image( params |= { "quality": settings.THUMBNAIL_JPG_QUALITY, "compression": settings.THUMBNAIL_PNG_COMPRESSION, + "type": "auto", # uses ``Accept`` header to determine output type } else: params |= {"quality": 100, "compression": 0} img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm( - path, params + path, params, [("Accept", accept_header)] ) response = HttpResponse( img_res.read(), status=res_status, content_type=content_type From 9ee8d4ff270173186dc9180f417751c69b594777 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 8 Apr 2022 11:07:18 +0400 Subject: [PATCH 09/16] Prevent jpg/png conversion --- api/catalog/api/views/media_views.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index 40377d9aa..55ecc3de0 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -197,21 +197,27 @@ def _get_proxied_image( info_res, *_ = MediaViewSet._thumbnail_proxy_comm("info", {"url": image_url}) info = json.loads(info_res.read()) - path = "resize" params = { "url": image_url, "width": info["width"] if is_full_size else settings.THUMBNAIL_WIDTH_PX, } + if is_compressed: params |= { "quality": settings.THUMBNAIL_JPG_QUALITY, "compression": settings.THUMBNAIL_PNG_COMPRESSION, - "type": "auto", # uses ``Accept`` header to determine output type } else: - params |= {"quality": 100, "compression": 0} + params |= { + "quality": 100, + "compression": 0, + } + + if "webp" in accept_header: + params["type"] = "auto" # Use ``Accept`` header to determine output type. + img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm( - path, params, [("Accept", accept_header)] + "resize", params, [("Accept", accept_header)] ) response = HttpResponse( img_res.read(), status=res_status, content_type=content_type From 32868e66558ca41ee2b68844325b4cdb6dca1c9f Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Fri, 8 Apr 2022 11:19:33 +0400 Subject: [PATCH 10/16] Add tests for WEBP output --- api/test/audio_integration_test.py | 5 +++++ api/test/image_integration_test.py | 5 +++++ api/test/media_integration.py | 11 +++++++++++ 3 files changed, 21 insertions(+) diff --git a/api/test/audio_integration_test.py b/api/test/audio_integration_test.py index 894b576a8..4e8bfbe0c 100644 --- a/api/test/audio_integration_test.py +++ b/api/test/audio_integration_test.py @@ -18,6 +18,7 @@ stats, thumb, thumb_compression, + thumb_webp, ) import pytest @@ -78,5 +79,9 @@ def test_audio_thumb_compression(audio_fixture): thumb_compression(audio_fixture) +def test_audio_thumb_webp(audio_fixture): + thumb_webp(audio_fixture) + + def test_audio_report(audio_fixture): report("audio", audio_fixture) diff --git a/api/test/image_integration_test.py b/api/test/image_integration_test.py index 0c054bccf..d984e370d 100644 --- a/api/test/image_integration_test.py +++ b/api/test/image_integration_test.py @@ -18,6 +18,7 @@ stats, thumb, thumb_compression, + thumb_webp, ) from urllib.parse import urlencode @@ -77,6 +78,10 @@ def test_image_thumb_compression(image_fixture): thumb_compression(image_fixture) +def test_image_thumb_webp(image_fixture): + thumb_webp(image_fixture) + + def test_audio_report(image_fixture): report("images", image_fixture) diff --git a/api/test/media_integration.py b/api/test/media_integration.py index 6b84b0e17..67f769013 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -116,6 +116,17 @@ def thumb_compression(fixture): assert compressed_size < actual_size +def thumb_webp(fixture): + thumbnail_url = fixture["results"][0]["thumbnail"] + + thumbnail_response = requests.get(thumbnail_url, headers={"Accept": "image/*,*/*"}) + assert thumbnail_response.headers["Content-Type"] != "image/webp" + thumbnail_response = requests.get( + thumbnail_url, headers={"Accept": "image/webp,image/*,*/*"} + ) + assert thumbnail_response.headers["Content-Type"] == "image/webp" + + def report(media_type, fixture): test_id = fixture["results"][0]["id"] response = requests.post( From 88c02b6ce7d3eed1d9e6aabfa63f00bc341e3443 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 11 Apr 2022 13:51:18 +0400 Subject: [PATCH 11/16] Perform info request only when needed Co-authored-by: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> --- api/catalog/api/views/media_views.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index 55ecc3de0..c564fa79d 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -194,12 +194,15 @@ def _get_proxied_image( is_full_size: bool = False, is_compressed: bool = True, ): - info_res, *_ = MediaViewSet._thumbnail_proxy_comm("info", {"url": image_url}) - info = json.loads(info_res.read()) + width = settings.THUMBNAIL_WIDTH_PX + if is_full_size: + info_res, *_ = MediaViewSet._thumbnail_proxy_comm("info", {"url": image_url}) + info = json.loads(info_res.read()) + width = info["width"] params = { "url": image_url, - "width": info["width"] if is_full_size else settings.THUMBNAIL_WIDTH_PX, + "width": width, } if is_compressed: From 5449e8d733dd2ef994772c183ee024ecdba98977 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 11 Apr 2022 13:52:11 +0400 Subject: [PATCH 12/16] Raise exception from serialiser instead of custom Co-authored-by: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> --- api/catalog/api/views/media_views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index c564fa79d..b48dd3575 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -130,8 +130,7 @@ def report(self, request, *_, **__): def thumbnail(self, image_url, request, *_, **__): serializer = self.get_serializer(data=request.query_params) - if not serializer.is_valid(): - raise get_api_exception("Invalid input.", 400) + serializer.is_valid(raise_exception=True) return self._get_proxied_image( image_url, accept_header=request.headers.get("Accept", "image/*"), From ff1c7df2f96c1e76122590f4d9e35ae982403fcb Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 11 Apr 2022 15:53:46 +0400 Subject: [PATCH 13/16] Use tuples for headers --- api/catalog/api/views/media_views.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index b48dd3575..b1ba6b961 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -1,6 +1,5 @@ import json import logging as log -from typing import List from urllib.error import HTTPError from urllib.parse import urlencode from urllib.request import Request, urlopen @@ -159,9 +158,7 @@ def _get_user_ip(request): def _thumbnail_proxy_comm( path: str, params: dict, - headers: List[ - tuple[str, str] - ] = None, # ``List`` because there is a ``list`` function + headers: tuple[tuple[str, str]] = (), ): proxy_url = settings.THUMBNAIL_PROXY_URL query_string = urlencode(params) @@ -170,10 +167,9 @@ def _thumbnail_proxy_comm( try: req = Request(upstream_url) - if headers: - for key, val in headers: - req.add_header(key, val) - upstream_response = urlopen(req) + for key, val in headers: + req.add_header(key, val) + upstream_response = urlopen(req, timeout=5) res_status = upstream_response.status content_type = upstream_response.headers.get("Content-Type") @@ -195,7 +191,9 @@ def _get_proxied_image( ): width = settings.THUMBNAIL_WIDTH_PX if is_full_size: - info_res, *_ = MediaViewSet._thumbnail_proxy_comm("info", {"url": image_url}) + info_res, *_ = MediaViewSet._thumbnail_proxy_comm( + "info", {"url": image_url} + ) info = json.loads(info_res.read()) width = info["width"] @@ -219,7 +217,7 @@ def _get_proxied_image( params["type"] = "auto" # Use ``Accept`` header to determine output type. img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm( - "resize", params, [("Accept", accept_header)] + "resize", params, (("Accept", accept_header),) ) response = HttpResponse( img_res.read(), status=res_status, content_type=content_type From bb3d89c8bc1b9309e6cb655427d7856337db75be Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 11 Apr 2022 15:54:34 +0400 Subject: [PATCH 14/16] Forward exception message in API --- api/catalog/api/views/media_views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index b1ba6b961..b72b209ac 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -179,8 +179,8 @@ def _thumbnail_proxy_comm( ) return upstream_response, res_status, content_type - except HTTPError: - raise get_api_exception("Failed to render thumbnail.") + except HTTPError as exc: + raise get_api_exception(f"Failed to render thumbnail: {exc}") @staticmethod def _get_proxied_image( From 3c2312110a9f8603e215b0e78e8e64b3e3bc2ac4 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Mon, 11 Apr 2022 16:31:04 +0400 Subject: [PATCH 15/16] Add tests for getting full-sized proxied image --- api/test/audio_integration_test.py | 5 +++++ api/test/image_integration_test.py | 5 +++++ api/test/media_integration.py | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/api/test/audio_integration_test.py b/api/test/audio_integration_test.py index 4e8bfbe0c..1c6d6f78b 100644 --- a/api/test/audio_integration_test.py +++ b/api/test/audio_integration_test.py @@ -18,6 +18,7 @@ stats, thumb, thumb_compression, + thumb_full_size, thumb_webp, ) @@ -83,5 +84,9 @@ def test_audio_thumb_webp(audio_fixture): thumb_webp(audio_fixture) +def test_audio_thumb_full_size(audio_fixture): + thumb_full_size(audio_fixture) + + def test_audio_report(audio_fixture): report("audio", audio_fixture) diff --git a/api/test/image_integration_test.py b/api/test/image_integration_test.py index d984e370d..4e0fc7369 100644 --- a/api/test/image_integration_test.py +++ b/api/test/image_integration_test.py @@ -18,6 +18,7 @@ stats, thumb, thumb_compression, + thumb_full_size, thumb_webp, ) from urllib.parse import urlencode @@ -82,6 +83,10 @@ def test_image_thumb_webp(image_fixture): thumb_webp(image_fixture) +def test_image_thumb_full_size(image_fixture): + thumb_full_size(image_fixture) + + def test_audio_report(image_fixture): report("images", image_fixture) diff --git a/api/test/media_integration.py b/api/test/media_integration.py index 67f769013..b1cf182b5 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -4,9 +4,11 @@ """ import json +from io import BytesIO from test.constants import API_URL import requests +from PIL import Image def search(fixture): @@ -127,6 +129,22 @@ def thumb_webp(fixture): assert thumbnail_response.headers["Content-Type"] == "image/webp" +def thumb_full_size(fixture): + def _get_image_dimen(url: str) -> tuple[int, int]: + response = requests.get(url) + image = Image.open(BytesIO(response.content)) + return image.size + + thumbnail_url = fixture["results"][0]["thumbnail"] + full_w, full_h = _get_image_dimen(f"{thumbnail_url}?full_size=yes") + scaled_w, scaled_h = _get_image_dimen(thumbnail_url) + if full_w > 600: + assert scaled_w == 600 + assert full_w > scaled_w + else: + assert scaled_w == full_w # h2non/imaginary will not scale up + + def report(media_type, fixture): test_id = fixture["results"][0]["id"] response = requests.post( From a4d15c5395f422c1912ec9790ebc5fd0569a8959 Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Wed, 13 Apr 2022 00:52:38 +0400 Subject: [PATCH 16/16] Add a workflow to build our custom image for imaginary (#641) * Define a workflow to build Docker image for imaginary * Use custom imaginary image in Docker Compose --- .github/workflows/build_imaginary.yml | 57 +++++++++++++++++++++++++++ docker-compose.yml | 2 +- 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build_imaginary.yml diff --git a/.github/workflows/build_imaginary.yml b/.github/workflows/build_imaginary.yml new file mode 100644 index 000000000..1724b7647 --- /dev/null +++ b/.github/workflows/build_imaginary.yml @@ -0,0 +1,57 @@ +name: Build imaginary +on: + workflow_dispatch: + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - name: Checkout h2non/imaginary repository + uses: actions/checkout@v3 + with: + repository: h2non/imaginary + path: imaginary + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + with: + install: true + + - name: Log in to GitHub Docker Registry + uses: docker/login-action@v1 + with: + registry: https://ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Go + uses: actions/setup-go@v3 + with: + go-version: "1.17" + + - name: Update modules # https://github.com/h2non/imaginary/issues/387 + working-directory: imaginary + run: | + sed -i 's/bimg v1.1.4/bimg v1.1.7/' go.mod + go mod tidy + + - name: Skip tests # https://github.com/golang/go/issues/29948 + working-directory: imaginary + run: | + sed -i 's/RUN go test/# RUN go test/' Dockerfile + sed -i 's/RUN golangci/# RUN golangci/' Dockerfile + + - name: Build image 'imaginary' + uses: docker/build-push-action@v2 + with: + context: imaginary + platforms: linux/arm64,linux/amd64 + cache-from: type=gha,scope=imaginary + cache-to: type=gha,scope=imaginary + push: true + tags: | + ghcr.io/wordpress/openverse-imaginary:latest diff --git a/docker-compose.yml b/docker-compose.yml index f28c68c36..beed92cc8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,7 +12,7 @@ services: - api-postgres:/var/lib/postgresql/data thumbnails: - image: h2non/imaginary:latest + image: ghcr.io/wordpress/openverse-imaginary:latest ports: - "8222:8222" environment: