From 2d5779b81827f161dc65159fd264d286f550619b Mon Sep 17 00:00:00 2001 From: Dhruv Bhanushali Date: Wed, 13 Apr 2022 00:58:36 +0400 Subject: [PATCH] Improve the thumbnail service to support compression and WEBP (#630) Co-authored-by: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com> --- .github/workflows/build_imaginary.yml | 57 ++++++++++++ api/catalog/api/docs/audio_docs.py | 16 ++++ api/catalog/api/docs/image_docs.py | 15 +++ .../api/serializers/media_serializers.py | 27 ++++++ api/catalog/api/views/audio_views.py | 11 +-- api/catalog/api/views/image_views.py | 11 +-- api/catalog/api/views/media_views.py | 91 +++++++++++++++---- api/catalog/settings.py | 4 +- api/docs/guides/quickstart.md | 2 +- api/env.docker | 2 +- api/env.template | 6 +- api/test/audio_integration_test.py | 15 +++ api/test/image_integration_test.py | 15 +++ api/test/media_integration.py | 40 ++++++++ docker-compose.yml | 9 +- 15 files changed, 286 insertions(+), 35 deletions(-) create mode 100644 .github/workflows/build_imaginary.yml diff --git a/.github/workflows/build_imaginary.yml b/.github/workflows/build_imaginary.yml new file mode 100644 index 000000000..1724b7647 --- /dev/null +++ b/.github/workflows/build_imaginary.yml @@ -0,0 +1,57 @@ +name: Build imaginary +on: + workflow_dispatch: + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - name: Checkout h2non/imaginary repository + uses: actions/checkout@v3 + with: + repository: h2non/imaginary + path: imaginary + + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + with: + install: true + + - name: Log in to GitHub Docker Registry + uses: docker/login-action@v1 + with: + registry: https://ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup Go + uses: actions/setup-go@v3 + with: + go-version: "1.17" + + - name: Update modules # https://github.com/h2non/imaginary/issues/387 + working-directory: imaginary + run: | + sed -i 's/bimg v1.1.4/bimg v1.1.7/' go.mod + go mod tidy + + - name: Skip tests # https://github.com/golang/go/issues/29948 + working-directory: imaginary + run: | + sed -i 's/RUN go test/# RUN go test/' Dockerfile + sed -i 's/RUN golangci/# RUN golangci/' Dockerfile + + - name: Build image 'imaginary' + uses: docker/build-push-action@v2 + with: + context: imaginary + platforms: linux/arm64,linux/amd64 + cache-from: type=gha,scope=imaginary + cache-to: type=gha,scope=imaginary + push: true + tags: | + ghcr.io/wordpress/openverse-imaginary:latest diff --git a/api/catalog/api/docs/audio_docs.py b/api/catalog/api/docs/audio_docs.py index 79c00f68c..b7964136a 100644 --- a/api/catalog/api/docs/audio_docs.py +++ b/api/catalog/api/docs/audio_docs.py @@ -5,6 +5,7 @@ MediaSearch, MediaStats, fields_to_md, + refer_sample, ) from catalog.api.examples import ( audio_complain_201_example, @@ -31,6 +32,7 @@ InputErrorSerializer, NotFoundErrorSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.serializers.provider_serializers import ProviderSerializer from drf_yasg import openapi @@ -206,3 +208,17 @@ class AudioComplain(MediaComplain): "responses": responses, "code_examples": code_examples, } + + +class AudioThumbnail: + desc = f""" +thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail +of the artwork of an audio track or its audio set. + +{refer_sample}""" + + swagger_setup = { + "operation_id": "audio_thumbnail", + "operation_description": desc, + "query_serializer": MediaThumbnailRequestSerializer, + } diff --git a/api/catalog/api/docs/image_docs.py b/api/catalog/api/docs/image_docs.py index f5c5fac9f..b53bac983 100644 --- a/api/catalog/api/docs/image_docs.py +++ b/api/catalog/api/docs/image_docs.py @@ -37,6 +37,7 @@ OembedRequestSerializer, OembedSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.serializers.provider_serializers import ProviderSerializer from drf_yasg import openapi @@ -238,3 +239,17 @@ class ImageOembed: "responses": responses, "code_examples": code_examples, } + + +class ImageThumbnail: + desc = f""" +thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail +of an image. + +{refer_sample}""" + + swagger_setup = { + "operation_id": "image_thumbnail", + "operation_description": desc, + "query_serializer": MediaThumbnailRequestSerializer, + } diff --git a/api/catalog/api/serializers/media_serializers.py b/api/catalog/api/serializers/media_serializers.py index 6bb046a49..24689b9f5 100644 --- a/api/catalog/api/serializers/media_serializers.py +++ b/api/catalog/api/serializers/media_serializers.py @@ -420,3 +420,30 @@ class MediaSearchSerializer(serializers.Serializer): page = serializers.IntegerField( help_text="The current page number returned in the response." ) + + +class MediaThumbnailRequestSerializer(serializers.Serializer): + """ + This serializer parses and validates thumbnail query string parameters. + """ + + full_size = serializers.BooleanField( + source="is_full_size", + allow_null=True, + required=False, + default=False, + help_text="whether to render the actual image and not a thumbnail version", + ) + compressed = serializers.BooleanField( + source="is_compressed", + allow_null=True, + default=None, + required=False, + help_text="whether to compress the output image to reduce file size," + "defaults to opposite of `full_size`", + ) + + def validate(self, data): + if data.get("is_compressed") is None: + data["is_compressed"] = not data["is_full_size"] + return data diff --git a/api/catalog/api/views/audio_views.py b/api/catalog/api/views/audio_views.py index 9bf2b7021..7791a0c6d 100644 --- a/api/catalog/api/views/audio_views.py +++ b/api/catalog/api/views/audio_views.py @@ -6,6 +6,7 @@ AudioRelated, AudioSearch, AudioStats, + AudioThumbnail, ) from catalog.api.models import Audio from catalog.api.serializers.audio_serializers import ( @@ -14,6 +15,7 @@ AudioSerializer, AudioWaveformSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.utils.exceptions import get_api_exception from catalog.api.utils.throttle import OneThousandPerMinute from catalog.api.views.media_views import MediaViewSet @@ -31,7 +33,7 @@ @method_decorator(swagger_auto_schema(**AudioDetail.swagger_setup), "retrieve") @method_decorator(swagger_auto_schema(**AudioRelated.swagger_setup), "related") @method_decorator(swagger_auto_schema(**AudioComplain.swagger_setup), "report") -@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail") +@method_decorator(swagger_auto_schema(**AudioThumbnail.swagger_setup), "thumbnail") @method_decorator(swagger_auto_schema(auto_schema=None), "waveform") class AudioViewSet(MediaViewSet): """ @@ -51,6 +53,7 @@ class AudioViewSet(MediaViewSet): detail=True, url_path="thumb", url_name="thumb", + serializer_class=MediaThumbnailRequestSerializer, throttle_classes=[OneThousandPerMinute], ) def thumbnail(self, request, *_, **__): @@ -64,11 +67,7 @@ def thumbnail(self, request, *_, **__): if not image_url: raise get_api_exception("Could not find artwork.", 404) - is_full_size = request.query_params.get("full_size", False) - if is_full_size: - return self._get_proxied_image(image_url, None) - else: - return self._get_proxied_image(image_url) + return super().thumbnail(image_url, request) @action( detail=True, diff --git a/api/catalog/api/views/image_views.py b/api/catalog/api/views/image_views.py index c506f5762..d2ab23e97 100644 --- a/api/catalog/api/views/image_views.py +++ b/api/catalog/api/views/image_views.py @@ -10,6 +10,7 @@ ImageRelated, ImageSearch, ImageStats, + ImageThumbnail, ) from catalog.api.models import Image from catalog.api.serializers.image_serializers import ( @@ -20,6 +21,7 @@ OembedSerializer, WatermarkRequestSerializer, ) +from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer from catalog.api.utils.exceptions import get_api_exception from catalog.api.utils.throttle import OneThousandPerMinute from catalog.api.utils.watermark import watermark @@ -42,7 +44,7 @@ @method_decorator(swagger_auto_schema(**ImageRelated.swagger_setup), "related") @method_decorator(swagger_auto_schema(**ImageComplain.swagger_setup), "report") @method_decorator(swagger_auto_schema(**ImageOembed.swagger_setup), "oembed") -@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail") +@method_decorator(swagger_auto_schema(**ImageThumbnail.swagger_setup), "thumbnail") @method_decorator(swagger_auto_schema(auto_schema=None), "watermark") class ImageViewSet(MediaViewSet): """ @@ -91,6 +93,7 @@ def oembed(self, request, *_, **__): detail=True, url_path="thumb", url_name="thumb", + serializer_class=MediaThumbnailRequestSerializer, throttle_classes=[OneThousandPerMinute], ) def thumbnail(self, request, *_, **__): @@ -100,11 +103,7 @@ def thumbnail(self, request, *_, **__): if not image_url: raise get_api_exception("Could not find image.", 404) - is_full_size = request.query_params.get("full_size", False) - if is_full_size: - return self._get_proxied_image(image_url, None) - else: - return self._get_proxied_image(image_url) + return super().thumbnail(image_url, request) @action(detail=True, url_path="watermark", url_name="watermark") def watermark(self, request, *_, **__): diff --git a/api/catalog/api/views/media_views.py b/api/catalog/api/views/media_views.py index 63c33fdbd..b72b209ac 100644 --- a/api/catalog/api/views/media_views.py +++ b/api/catalog/api/views/media_views.py @@ -1,5 +1,8 @@ +import json +import logging as log from urllib.error import HTTPError -from urllib.request import urlopen +from urllib.parse import urlencode +from urllib.request import Request, urlopen from catalog.api.controllers import search_controller from catalog.api.models import ContentProvider @@ -124,6 +127,15 @@ def report(self, request, *_, **__): serializer = self.get_serializer(report) return Response(data=serializer.data, status=status.HTTP_201_CREATED) + def thumbnail(self, image_url, request, *_, **__): + serializer = self.get_serializer(data=request.query_params) + serializer.is_valid(raise_exception=True) + return self._get_proxied_image( + image_url, + accept_header=request.headers.get("Accept", "image/*"), + **serializer.validated_data, + ) + # Helper functions @staticmethod @@ -143,24 +155,71 @@ def _get_user_ip(request): return ip @staticmethod - def _get_proxied_image(image_url, width=settings.THUMBNAIL_WIDTH_PX): - if width is None: # full size - proxy_upstream = f"{settings.THUMBNAIL_PROXY_URL}/{image_url}" - else: - proxy_upstream = ( - f"{settings.THUMBNAIL_PROXY_URL}/" - f"{settings.THUMBNAIL_WIDTH_PX},fit/" - f"{image_url}" - ) + def _thumbnail_proxy_comm( + path: str, + params: dict, + headers: tuple[tuple[str, str]] = (), + ): + proxy_url = settings.THUMBNAIL_PROXY_URL + query_string = urlencode(params) + upstream_url = f"{proxy_url}/{path}?{query_string}" + log.debug(f"Image proxy upstream URL: {upstream_url}") + try: - upstream_response = urlopen(proxy_upstream) - status = upstream_response.status + req = Request(upstream_url) + for key, val in headers: + req.add_header(key, val) + upstream_response = urlopen(req, timeout=5) + + res_status = upstream_response.status content_type = upstream_response.headers.get("Content-Type") - except HTTPError: - raise get_api_exception("Failed to render thumbnail.") + log.debug( + "Image proxy response " + f"status: {res_status}, content-type: {content_type}" + ) + + return upstream_response, res_status, content_type + except HTTPError as exc: + raise get_api_exception(f"Failed to render thumbnail: {exc}") + + @staticmethod + def _get_proxied_image( + image_url: str, + accept_header: str = "image/*", + is_full_size: bool = False, + is_compressed: bool = True, + ): + width = settings.THUMBNAIL_WIDTH_PX + if is_full_size: + info_res, *_ = MediaViewSet._thumbnail_proxy_comm( + "info", {"url": image_url} + ) + info = json.loads(info_res.read()) + width = info["width"] + params = { + "url": image_url, + "width": width, + } + + if is_compressed: + params |= { + "quality": settings.THUMBNAIL_JPG_QUALITY, + "compression": settings.THUMBNAIL_PNG_COMPRESSION, + } + else: + params |= { + "quality": 100, + "compression": 0, + } + + if "webp" in accept_header: + params["type"] = "auto" # Use ``Accept`` header to determine output type. + + img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm( + "resize", params, (("Accept", accept_header),) + ) response = HttpResponse( - upstream_response.read(), status=status, content_type=content_type + img_res.read(), status=res_status, content_type=content_type ) - return response diff --git a/api/catalog/settings.py b/api/catalog/settings.py index b2338789e..4778420c7 100644 --- a/api/catalog/settings.py +++ b/api/catalog/settings.py @@ -184,7 +184,9 @@ # Produce CC-hosted thumbnails dynamically through a proxy. THUMBNAIL_PROXY_URL = config("THUMBNAIL_PROXY_URL", default="http://localhost:8222") -THUMBNAIL_WIDTH_PX = 600 +THUMBNAIL_WIDTH_PX = config("THUMBNAIL_WIDTH_PX", cast=int, default=600) +THUMBNAIL_JPG_QUALITY = config("THUMBNAIL_JPG_QUALITY", cast=int, default=80) +THUMBNAIL_PNG_COMPRESSION = config("THUMBNAIL_PNG_COMPRESSION", cast=int, default=6) AUTHENTICATION_BACKENDS = ( "oauth2_provider.backends.OAuth2Backend", diff --git a/api/docs/guides/quickstart.md b/api/docs/guides/quickstart.md index 36e97110c..d277d4cbc 100644 --- a/api/docs/guides/quickstart.md +++ b/api/docs/guides/quickstart.md @@ -79,7 +79,7 @@ The command `just up` spawns the following services: - API application database - [Elasticsearch](https://www.elastic.co/elasticsearch/) - [Redis](https://redis.io/) -- [imageproxy](https://github.com/willnorris/imageproxy) +- [imaginary](https://github.com/h2non/imaginary) - [NGINX](http://nginx.org) - **web** (`api/`) - **ingestion_server** and **indexer_worker** (`ingestion_server/`) diff --git a/api/env.docker b/api/env.docker index e76c27c5a..d2a78d94d 100644 --- a/api/env.docker +++ b/api/env.docker @@ -6,7 +6,7 @@ DJANGO_DEBUG_ENABLED="True" REDIS_HOST="cache" -THUMBNAIL_PROXY_URL="http://thumbs:8222" +THUMBNAIL_PROXY_URL="http://thumbnails:8222" DJANGO_DATABASE_HOST="db" diff --git a/api/env.template b/api/env.template index a59c4c706..556985cc4 100644 --- a/api/env.template +++ b/api/env.template @@ -15,7 +15,11 @@ DJANGO_DEBUG_ENABLED="True" #REDIS_PORT="6379" #REDIS_PASSWORD="" -#THUMBNAIL_PROXY_URL="http://thumbs:8222" +#THUMBNAIL_PROXY_URL="http://thumbnails:8222" + +#THUMBNAIL_WIDTH_PX="600" +#THUMBNAIL_JPG_QUALITY="80" +#THUMBNAIL_PNG_COMPRESSION="6" #DJANGO_DATABASE_HOST="db" #DJANGO_DATABASE_PORT="5432" diff --git a/api/test/audio_integration_test.py b/api/test/audio_integration_test.py index f545cdb62..1c6d6f78b 100644 --- a/api/test/audio_integration_test.py +++ b/api/test/audio_integration_test.py @@ -17,6 +17,9 @@ search_special_chars, stats, thumb, + thumb_compression, + thumb_full_size, + thumb_webp, ) import pytest @@ -73,5 +76,17 @@ def test_audio_thumb(audio_fixture): thumb(audio_fixture) +def test_audio_thumb_compression(audio_fixture): + thumb_compression(audio_fixture) + + +def test_audio_thumb_webp(audio_fixture): + thumb_webp(audio_fixture) + + +def test_audio_thumb_full_size(audio_fixture): + thumb_full_size(audio_fixture) + + def test_audio_report(audio_fixture): report("audio", audio_fixture) diff --git a/api/test/image_integration_test.py b/api/test/image_integration_test.py index e892a42c4..4e0fc7369 100644 --- a/api/test/image_integration_test.py +++ b/api/test/image_integration_test.py @@ -17,6 +17,9 @@ search_special_chars, stats, thumb, + thumb_compression, + thumb_full_size, + thumb_webp, ) from urllib.parse import urlencode @@ -72,6 +75,18 @@ def test_image_thumb(image_fixture): thumb(image_fixture) +def test_image_thumb_compression(image_fixture): + thumb_compression(image_fixture) + + +def test_image_thumb_webp(image_fixture): + thumb_webp(image_fixture) + + +def test_image_thumb_full_size(image_fixture): + thumb_full_size(image_fixture) + + def test_audio_report(image_fixture): report("images", image_fixture) diff --git a/api/test/media_integration.py b/api/test/media_integration.py index 9bc1c3409..b1cf182b5 100644 --- a/api/test/media_integration.py +++ b/api/test/media_integration.py @@ -4,9 +4,11 @@ """ import json +from io import BytesIO from test.constants import API_URL import requests +from PIL import Image def search(fixture): @@ -105,6 +107,44 @@ def thumb(fixture): assert thumbnail_response.headers["Content-Type"].startswith("image/") +def thumb_compression(fixture): + thumbnail_url = fixture["results"][0]["thumbnail"] + + thumbnail_response = requests.get(thumbnail_url) + compressed_size = len(thumbnail_response.content) + thumbnail_response = requests.get(f"{thumbnail_url}?compressed=no") + actual_size = len(thumbnail_response.content) + + assert compressed_size < actual_size + + +def thumb_webp(fixture): + thumbnail_url = fixture["results"][0]["thumbnail"] + + thumbnail_response = requests.get(thumbnail_url, headers={"Accept": "image/*,*/*"}) + assert thumbnail_response.headers["Content-Type"] != "image/webp" + thumbnail_response = requests.get( + thumbnail_url, headers={"Accept": "image/webp,image/*,*/*"} + ) + assert thumbnail_response.headers["Content-Type"] == "image/webp" + + +def thumb_full_size(fixture): + def _get_image_dimen(url: str) -> tuple[int, int]: + response = requests.get(url) + image = Image.open(BytesIO(response.content)) + return image.size + + thumbnail_url = fixture["results"][0]["thumbnail"] + full_w, full_h = _get_image_dimen(f"{thumbnail_url}?full_size=yes") + scaled_w, scaled_h = _get_image_dimen(thumbnail_url) + if full_w > 600: + assert scaled_w == 600 + assert full_w > scaled_w + else: + assert scaled_w == full_w # h2non/imaginary will not scale up + + def report(media_type, fixture): test_id = fixture["results"][0]["id"] response = requests.post( diff --git a/docker-compose.yml b/docker-compose.yml index 8d2649ff7..beed92cc8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,11 +11,14 @@ services: volumes: - api-postgres:/var/lib/postgresql/data - thumbs: - image: willnorris/imageproxy + thumbnails: + image: ghcr.io/wordpress/openverse-imaginary:latest ports: - "8222:8222" - command: ["-addr", "0.0.0.0:8222"] + environment: + PORT: 8222 + MALLOC_ARENA_MAX: 2 + command: ["-enable-url-source"] upstream_db: image: postgres:13.2-alpine