Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Improve the thumbnail service to support compression and WEBP (#630)
Browse files Browse the repository at this point in the history
Co-authored-by: sarayourfriend <24264157+sarayourfriend@users.noreply.github.com>
  • Loading branch information
dhruvkb and sarayourfriend committed Apr 12, 2022
1 parent 0df2105 commit 2d5779b
Show file tree
Hide file tree
Showing 15 changed files with 286 additions and 35 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/build_imaginary.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Build imaginary
on:
workflow_dispatch:

jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- name: Checkout h2non/imaginary repository
uses: actions/checkout@v3
with:
repository: h2non/imaginary
path: imaginary

- name: Set up QEMU
uses: docker/setup-qemu-action@v1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
install: true

- name: Log in to GitHub Docker Registry
uses: docker/login-action@v1
with:
registry: https://ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Setup Go
uses: actions/setup-go@v3
with:
go-version: "1.17"

- name: Update modules # https://github.com/h2non/imaginary/issues/387
working-directory: imaginary
run: |
sed -i 's/bimg v1.1.4/bimg v1.1.7/' go.mod
go mod tidy
- name: Skip tests # https://github.com/golang/go/issues/29948
working-directory: imaginary
run: |
sed -i 's/RUN go test/# RUN go test/' Dockerfile
sed -i 's/RUN golangci/# RUN golangci/' Dockerfile
- name: Build image 'imaginary'
uses: docker/build-push-action@v2
with:
context: imaginary
platforms: linux/arm64,linux/amd64
cache-from: type=gha,scope=imaginary
cache-to: type=gha,scope=imaginary
push: true
tags: |
ghcr.io/wordpress/openverse-imaginary:latest
16 changes: 16 additions & 0 deletions api/catalog/api/docs/audio_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
MediaSearch,
MediaStats,
fields_to_md,
refer_sample,
)
from catalog.api.examples import (
audio_complain_201_example,
Expand All @@ -31,6 +32,7 @@
InputErrorSerializer,
NotFoundErrorSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.serializers.provider_serializers import ProviderSerializer
from drf_yasg import openapi

Expand Down Expand Up @@ -206,3 +208,17 @@ class AudioComplain(MediaComplain):
"responses": responses,
"code_examples": code_examples,
}


class AudioThumbnail:
desc = f"""
thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail
of the artwork of an audio track or its audio set.
{refer_sample}"""

swagger_setup = {
"operation_id": "audio_thumbnail",
"operation_description": desc,
"query_serializer": MediaThumbnailRequestSerializer,
}
15 changes: 15 additions & 0 deletions api/catalog/api/docs/image_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
OembedRequestSerializer,
OembedSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.serializers.provider_serializers import ProviderSerializer
from drf_yasg import openapi

Expand Down Expand Up @@ -238,3 +239,17 @@ class ImageOembed:
"responses": responses,
"code_examples": code_examples,
}


class ImageThumbnail:
desc = f"""
thumbnail is an API endpoint to retrieve the scaled down and compressed thumbnail
of an image.
{refer_sample}"""

swagger_setup = {
"operation_id": "image_thumbnail",
"operation_description": desc,
"query_serializer": MediaThumbnailRequestSerializer,
}
27 changes: 27 additions & 0 deletions api/catalog/api/serializers/media_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,3 +420,30 @@ class MediaSearchSerializer(serializers.Serializer):
page = serializers.IntegerField(
help_text="The current page number returned in the response."
)


class MediaThumbnailRequestSerializer(serializers.Serializer):
"""
This serializer parses and validates thumbnail query string parameters.
"""

full_size = serializers.BooleanField(
source="is_full_size",
allow_null=True,
required=False,
default=False,
help_text="whether to render the actual image and not a thumbnail version",
)
compressed = serializers.BooleanField(
source="is_compressed",
allow_null=True,
default=None,
required=False,
help_text="whether to compress the output image to reduce file size,"
"defaults to opposite of `full_size`",
)

def validate(self, data):
if data.get("is_compressed") is None:
data["is_compressed"] = not data["is_full_size"]
return data
11 changes: 5 additions & 6 deletions api/catalog/api/views/audio_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
AudioRelated,
AudioSearch,
AudioStats,
AudioThumbnail,
)
from catalog.api.models import Audio
from catalog.api.serializers.audio_serializers import (
Expand All @@ -14,6 +15,7 @@
AudioSerializer,
AudioWaveformSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.utils.exceptions import get_api_exception
from catalog.api.utils.throttle import OneThousandPerMinute
from catalog.api.views.media_views import MediaViewSet
Expand All @@ -31,7 +33,7 @@
@method_decorator(swagger_auto_schema(**AudioDetail.swagger_setup), "retrieve")
@method_decorator(swagger_auto_schema(**AudioRelated.swagger_setup), "related")
@method_decorator(swagger_auto_schema(**AudioComplain.swagger_setup), "report")
@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail")
@method_decorator(swagger_auto_schema(**AudioThumbnail.swagger_setup), "thumbnail")
@method_decorator(swagger_auto_schema(auto_schema=None), "waveform")
class AudioViewSet(MediaViewSet):
"""
Expand All @@ -51,6 +53,7 @@ class AudioViewSet(MediaViewSet):
detail=True,
url_path="thumb",
url_name="thumb",
serializer_class=MediaThumbnailRequestSerializer,
throttle_classes=[OneThousandPerMinute],
)
def thumbnail(self, request, *_, **__):
Expand All @@ -64,11 +67,7 @@ def thumbnail(self, request, *_, **__):
if not image_url:
raise get_api_exception("Could not find artwork.", 404)

is_full_size = request.query_params.get("full_size", False)
if is_full_size:
return self._get_proxied_image(image_url, None)
else:
return self._get_proxied_image(image_url)
return super().thumbnail(image_url, request)

@action(
detail=True,
Expand Down
11 changes: 5 additions & 6 deletions api/catalog/api/views/image_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
ImageRelated,
ImageSearch,
ImageStats,
ImageThumbnail,
)
from catalog.api.models import Image
from catalog.api.serializers.image_serializers import (
Expand All @@ -20,6 +21,7 @@
OembedSerializer,
WatermarkRequestSerializer,
)
from catalog.api.serializers.media_serializers import MediaThumbnailRequestSerializer
from catalog.api.utils.exceptions import get_api_exception
from catalog.api.utils.throttle import OneThousandPerMinute
from catalog.api.utils.watermark import watermark
Expand All @@ -42,7 +44,7 @@
@method_decorator(swagger_auto_schema(**ImageRelated.swagger_setup), "related")
@method_decorator(swagger_auto_schema(**ImageComplain.swagger_setup), "report")
@method_decorator(swagger_auto_schema(**ImageOembed.swagger_setup), "oembed")
@method_decorator(swagger_auto_schema(auto_schema=None), "thumbnail")
@method_decorator(swagger_auto_schema(**ImageThumbnail.swagger_setup), "thumbnail")
@method_decorator(swagger_auto_schema(auto_schema=None), "watermark")
class ImageViewSet(MediaViewSet):
"""
Expand Down Expand Up @@ -91,6 +93,7 @@ def oembed(self, request, *_, **__):
detail=True,
url_path="thumb",
url_name="thumb",
serializer_class=MediaThumbnailRequestSerializer,
throttle_classes=[OneThousandPerMinute],
)
def thumbnail(self, request, *_, **__):
Expand All @@ -100,11 +103,7 @@ def thumbnail(self, request, *_, **__):
if not image_url:
raise get_api_exception("Could not find image.", 404)

is_full_size = request.query_params.get("full_size", False)
if is_full_size:
return self._get_proxied_image(image_url, None)
else:
return self._get_proxied_image(image_url)
return super().thumbnail(image_url, request)

@action(detail=True, url_path="watermark", url_name="watermark")
def watermark(self, request, *_, **__):
Expand Down
91 changes: 75 additions & 16 deletions api/catalog/api/views/media_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import json
import logging as log
from urllib.error import HTTPError
from urllib.request import urlopen
from urllib.parse import urlencode
from urllib.request import Request, urlopen

from catalog.api.controllers import search_controller
from catalog.api.models import ContentProvider
Expand Down Expand Up @@ -124,6 +127,15 @@ def report(self, request, *_, **__):
serializer = self.get_serializer(report)
return Response(data=serializer.data, status=status.HTTP_201_CREATED)

def thumbnail(self, image_url, request, *_, **__):
serializer = self.get_serializer(data=request.query_params)
serializer.is_valid(raise_exception=True)
return self._get_proxied_image(
image_url,
accept_header=request.headers.get("Accept", "image/*"),
**serializer.validated_data,
)

# Helper functions

@staticmethod
Expand All @@ -143,24 +155,71 @@ def _get_user_ip(request):
return ip

@staticmethod
def _get_proxied_image(image_url, width=settings.THUMBNAIL_WIDTH_PX):
if width is None: # full size
proxy_upstream = f"{settings.THUMBNAIL_PROXY_URL}/{image_url}"
else:
proxy_upstream = (
f"{settings.THUMBNAIL_PROXY_URL}/"
f"{settings.THUMBNAIL_WIDTH_PX},fit/"
f"{image_url}"
)
def _thumbnail_proxy_comm(
path: str,
params: dict,
headers: tuple[tuple[str, str]] = (),
):
proxy_url = settings.THUMBNAIL_PROXY_URL
query_string = urlencode(params)
upstream_url = f"{proxy_url}/{path}?{query_string}"
log.debug(f"Image proxy upstream URL: {upstream_url}")

try:
upstream_response = urlopen(proxy_upstream)
status = upstream_response.status
req = Request(upstream_url)
for key, val in headers:
req.add_header(key, val)
upstream_response = urlopen(req, timeout=5)

res_status = upstream_response.status
content_type = upstream_response.headers.get("Content-Type")
except HTTPError:
raise get_api_exception("Failed to render thumbnail.")
log.debug(
"Image proxy response "
f"status: {res_status}, content-type: {content_type}"
)

return upstream_response, res_status, content_type
except HTTPError as exc:
raise get_api_exception(f"Failed to render thumbnail: {exc}")

@staticmethod
def _get_proxied_image(
image_url: str,
accept_header: str = "image/*",
is_full_size: bool = False,
is_compressed: bool = True,
):
width = settings.THUMBNAIL_WIDTH_PX
if is_full_size:
info_res, *_ = MediaViewSet._thumbnail_proxy_comm(
"info", {"url": image_url}
)
info = json.loads(info_res.read())
width = info["width"]

params = {
"url": image_url,
"width": width,
}

if is_compressed:
params |= {
"quality": settings.THUMBNAIL_JPG_QUALITY,
"compression": settings.THUMBNAIL_PNG_COMPRESSION,
}
else:
params |= {
"quality": 100,
"compression": 0,
}

if "webp" in accept_header:
params["type"] = "auto" # Use ``Accept`` header to determine output type.

img_res, res_status, content_type = MediaViewSet._thumbnail_proxy_comm(
"resize", params, (("Accept", accept_header),)
)
response = HttpResponse(
upstream_response.read(), status=status, content_type=content_type
img_res.read(), status=res_status, content_type=content_type
)

return response
4 changes: 3 additions & 1 deletion api/catalog/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,9 @@
# Produce CC-hosted thumbnails dynamically through a proxy.
THUMBNAIL_PROXY_URL = config("THUMBNAIL_PROXY_URL", default="http://localhost:8222")

THUMBNAIL_WIDTH_PX = 600
THUMBNAIL_WIDTH_PX = config("THUMBNAIL_WIDTH_PX", cast=int, default=600)
THUMBNAIL_JPG_QUALITY = config("THUMBNAIL_JPG_QUALITY", cast=int, default=80)
THUMBNAIL_PNG_COMPRESSION = config("THUMBNAIL_PNG_COMPRESSION", cast=int, default=6)

AUTHENTICATION_BACKENDS = (
"oauth2_provider.backends.OAuth2Backend",
Expand Down
2 changes: 1 addition & 1 deletion api/docs/guides/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ The command `just up` spawns the following services:
- API application database
- [Elasticsearch](https://www.elastic.co/elasticsearch/)
- [Redis](https://redis.io/)
- [imageproxy](https://github.com/willnorris/imageproxy)
- [imaginary](https://github.com/h2non/imaginary)
- [NGINX](http://nginx.org)
- **web** (`api/`)
- **ingestion_server** and **indexer_worker** (`ingestion_server/`)
Expand Down
2 changes: 1 addition & 1 deletion api/env.docker
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ DJANGO_DEBUG_ENABLED="True"

REDIS_HOST="cache"

THUMBNAIL_PROXY_URL="http://thumbs:8222"
THUMBNAIL_PROXY_URL="http://thumbnails:8222"

DJANGO_DATABASE_HOST="db"

Expand Down
6 changes: 5 additions & 1 deletion api/env.template
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ DJANGO_DEBUG_ENABLED="True"
#REDIS_PORT="6379"
#REDIS_PASSWORD=""

#THUMBNAIL_PROXY_URL="http://thumbs:8222"
#THUMBNAIL_PROXY_URL="http://thumbnails:8222"

#THUMBNAIL_WIDTH_PX="600"
#THUMBNAIL_JPG_QUALITY="80"
#THUMBNAIL_PNG_COMPRESSION="6"

#DJANGO_DATABASE_HOST="db"
#DJANGO_DATABASE_PORT="5432"
Expand Down
Loading

0 comments on commit 2d5779b

Please sign in to comment.