updated tests

front-matter · Mar 26, 2024 · 04de9a2 · 04de9a2
1 parent eb2cd21
commit 04de9a2
Show file tree

Hide file tree

Showing 23 changed files with 1,166 additions and 266,771 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,4 +1,3 @@
 fly.toml
-.venv
 /tests
 /build
diff --git a/api/__init__.py b/api/__init__.py
@@ -405,7 +405,7 @@ async def post_posts():
 @app.route("/posts/<slug>")
 @app.route("/posts/<slug>/<suffix>")
 @app.route("/posts/<slug>/<suffix>/<relation>")
-async def post(slug: str, suffix: Optional[str] = None, relation:Optional[str] = None ):
+async def post(slug: str, suffix: Optional[str] = None, relation: Optional[str] = None):
     """Get post by slug."""
     prefixes = [
         "10.13003",
@@ -470,7 +470,7 @@ async def post(slug: str, suffix: Optional[str] = None, relation:Optional[str] =
         references = response.data.get("reference", [])
         count = len(references)
         return jsonify({"total-results": count, "items": references})
-    elif slug in prefixes and suffix:        
+    elif slug in prefixes and suffix:
         path = suffix.split(".")
         if len(path) > 1 and path[-1] in [
             "md",
@@ -528,7 +528,7 @@ async def post(slug: str, suffix: Optional[str] = None, relation:Optional[str] =
                 "tags": "keywords",
                 "updated_at": "date_updated",
                 "blog.issn": "issn",
-                "blog.license": "license"
+                "blog.license": "license",
             },
         )
         metadata = py_.omit(

diff --git a/api/posts.py b/api/posts.py
@@ -13,7 +13,7 @@
 import time
 import traceback
 from urllib.parse import unquote
-from commonmeta import validate_doi, normalize_doi, normalize_id, validate_url
+from commonmeta import validate_doi, normalize_id, validate_url
 from Levenshtein import ratio
 
 from api.utils import (
@@ -326,9 +326,7 @@ def format_author(author, published_at):
             py_.get(post, "_embedded.wp:featuredmedia[0].source_url", None)
             or py_.get(post, "yoast_head_json.og_image[0].url", None)
             or post.get("jetpack_featured_media_url", None)
-        )
-        if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
+        ) or get_image(images)
 
         # optionally remove category that is used to filter posts
         if blog.get("filter", None) and blog.get("filter", "").startswith("category"):
@@ -419,9 +417,7 @@ def format_author(author, published_at):
             blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
         )
         images = get_images(content_html, url, blog.get("home_page_url", None))
-        image = None
-        if len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
+        image = get_image(images)
         tags = [
             normalize_tag(i)
             for i in post.get("categories", None).keys()
@@ -484,9 +480,7 @@ def format_author(author, published_at):
             blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
         )
         images = get_images(content_html, url, blog.get("home_page_url", None))
-        image = post.get("feature_image", None)
-        if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
+        image = post.get("feature_image", None) or get_image(images)
         tags = [
             normalize_tag(i.get("name", None))
             for i in post.get("tags", None)
@@ -548,9 +542,7 @@ def format_author(author, published_at):
             blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
         )
         images = get_images(content_html, url, blog.get("home_page_url", None))
-        image = post.get("cover_image", None)
-        if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
+        image = post.get("cover_image", None) or get_image(images)
         tags = [
             normalize_tag(i.get("name"))
             for i in wrap(post.get("postTags", None))
@@ -616,9 +608,7 @@ def format_author(author, published_at):
             blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
         )
         images = get_images(content_html, url, blog.get("home_page_url", None))
-        image = post.get("assetUrl", None)
-        if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
+        image = post.get("assetUrl", None) or get_image(images)
         tags = [
             normalize_tag(i)
             for i in wrap(post.get("categories", None))
@@ -684,9 +674,7 @@ def format_author(author, published_at):
         if blog.get("relative_url", None) == "blog":
             base_url = blog.get("home_page_url", None)
         images = get_images(content_html, base_url, blog.get("home_page_url", None))
-        image = py_.get(post, "media:thumbnail.@url", None)
-        if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
+        image = py_.get(post, "media:thumbnail.@url", None) or get_image(images)
         tags = [
             normalize_tag(i)
             for i in wrap(post.get("tags", None))
@@ -776,16 +764,14 @@ def get_url(links):
         if blog.get("relative_url", None) == "blog":
             base_url = blog.get("home_page_url", None)
         images = get_images(content_html, base_url, blog.get("home_page_url", None))
-        image = py_.get(post, "media:thumbnail.@url", None)
+        image = py_.get(post, "media:thumbnail.@url", None) or get_image(images)
         # workaround for eve blog
         if image is not None:
             f = furl(image)
             if f.host == "eve.gd":
                 image = unquote(image)
                 if f.path.segments[0] != "images":
                     image = f.set(path="/images/" + f.path.segments[0]).url
-        if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
-            image = images[0].get("src", None)
         tags = [
             normalize_tag(i.get("@term", None))
             for i in wrap(post.get("category", None))
@@ -1065,6 +1051,23 @@ def get_title(content_html: str):
     return sanitized
 
 
+def get_image(images: list, width: int = 200):
+    """Get first image with width >= 200."""
+    if not images or len(images) == 0:
+        return None
+    try:
+        return next(
+            (
+                image.get("src", None)
+                for image in images
+                if int(image.get("width", 200)) >= width
+            ),
+            None,
+        )
+    except ValueError:
+        return None
+
+
 def get_summary(content_html: str = None, maxlen: int = 450):
     """Get summary from excerpt or content_html."""
     if not content_html:

diff --git a/tests/cassettes/test-blogs/test_update_single_blog.yaml b/tests/cassettes/test-blogs/test_update_single_blog.yaml
@@ -5,7 +5,7 @@ interactions:
       accept:
       - '*/*'
       accept-encoding:
-      - gzip, deflate
+      - gzip, deflate, br
       accept-profile:
       - public
       connection:
@@ -21,32 +21,30 @@ interactions:
       user-agent:
       - python-httpx/0.25.2
       x-client-info:
-      - supabase-py/2.3.5
+      - supabase-py/2.4.0
     method: GET
     uri: https://db.rogue-scholar.org/rest/v1/posts?select=updated_at&blog_slug=eq.epub_fis&order=updated_at.desc&limit=1
   response:
-    content: '[{"updated_at":1706534803}]'
+    content: '[{"updated_at":1710745399}]'
     headers:
       Access-Control-Allow-Origin:
       - '*'
       CF-Cache-Status:
       - DYNAMIC
       CF-Ray:
-      - 85a0971eba5f1635-DUS
+      - 86aa65c8098acb2b-DUS
       Connection:
       - keep-alive
       Content-Encoding:
-      - gzip
+      - br
       Content-Location:
       - /posts?blog_slug=eq.epub_fis&limit=1&order=updated_at.desc&select=updated_at
-      Content-Profile:
-      - public
       Content-Range:
       - 0-0/*
       Content-Type:
       - application/json; charset=utf-8
       Date:
-      - Fri, 23 Feb 2024 15:44:28 GMT
+      - Tue, 26 Mar 2024 21:57:39 GMT
       Server:
       - cloudflare
       Strict-Transport-Security:
@@ -57,21 +55,23 @@ interactions:
       - Accept-Encoding
       Via:
       - kong/2.8.1
-      X-Kong-Proxy-Latency:
-      - '1'
-      X-Kong-Upstream-Latency:
-      - '13'
       alt-svc:
       - h3=":443"; ma=86400
+      content-profile:
+      - public
       sb-gateway-version:
       - '1'
+      x-kong-proxy-latency:
+      - '1'
+      x-kong-upstream-latency:
+      - '20'
     http_version: HTTP/1.1
     status_code: 200
 - request:
     body: '{"title": "FIS & EPub", "description": "Gemeinsamer Blog der DINI AG Forschungsinformationssystem
       und Elektronisches Publizieren", "feed_url": "https://blog.dini.de/EPub_FIS/feed/atom/",
       "current_feed_url": null, "home_page_url": "https://blog.dini.de/EPub_FIS",
-      "feed_format": "application/atom+xml", "updated_at": 1706534803, "language":
+      "feed_format": "application/atom+xml", "updated_at": 1710745399, "language":
       "de", "category": "socialSciences", "favicon": "https://blog.dini.de/EPub_FIS/wp-content/uploads/2018/03/cropped-DINI-AG-FIS-3-1-150x150.png",
       "license": "https://creativecommons.org/licenses/by/4.0/legalcode", "generator":
       "WordPress 6.3.2", "generator_raw": null, "status": "active", "user_id": "a9e3541e-1e00-4bf3-8a4d-fc9b1c505651",
@@ -80,7 +80,7 @@ interactions:
       accept:
       - '*/*'
       accept-encoding:
-      - gzip, deflate
+      - gzip, deflate, br
       accept-profile:
       - public
       connection:
@@ -98,32 +98,30 @@ interactions:
       user-agent:
       - python-httpx/0.25.2
       x-client-info:
-      - supabase-py/2.3.5
+      - supabase-py/2.4.0
     method: PATCH
     uri: https://db.rogue-scholar.org/rest/v1/blogs?slug=eq.epub_fis
   response:
     content: '[{"title":"FIS & EPub","description":"Gemeinsamer Blog der DINI AG Forschungsinformationssystem
       und Elektronisches Publizieren","language":"de","favicon":"https://blog.dini.de/EPub_FIS/wp-content/uploads/2018/03/cropped-DINI-AG-FIS-3-1-150x150.png","feed_url":"https://blog.dini.de/EPub_FIS/feed/atom/","home_page_url":"https://blog.dini.de/EPub_FIS","user_id":"a9e3541e-1e00-4bf3-8a4d-fc9b1c505651","feed_format":"application/atom+xml","license":"https://creativecommons.org/licenses/by/4.0/legalcode","generator":"WordPress
-      6.3.2","category":"socialSciences","prefix":null,"current_feed_url":null,"status":"active","issn":"2940-1798","backlog":0,"authors":null,"plan":"Team","slug":"epub_fis","use_mastodon":false,"api":true,"funding":null,"archive_prefix":"https://wayback.archive-it.org/22121/20231105105752/","use_api":true,"relative_url":null,"canonical_url":null,"filter":null,"updated_at":1706534803,"secure":null,"created_at":1689897600,"indexed":null,"generator_raw":null,"id":"2e00349e-defb-4459-9653-f284f516ac20","ror":null,"mastodon":null}]'
+      6.3.2","category":"socialSciences","prefix":null,"current_feed_url":null,"status":"active","issn":"2940-1798","authors":null,"plan":"Team","slug":"epub_fis","use_mastodon":false,"funding":null,"archive_prefix":"https://wayback.archive-it.org/22121/20231105105752/","use_api":true,"relative_url":null,"canonical_url":null,"filter":null,"updated_at":1710745399,"secure":null,"created_at":1689897600,"indexed":null,"generator_raw":null,"id":"2e00349e-defb-4459-9653-f284f516ac20","ror":null,"mastodon":null}]'
     headers:
       Access-Control-Allow-Origin:
       - '*'
       CF-Cache-Status:
       - DYNAMIC
       CF-Ray:
-      - 85a0971f3a52cb09-DUS
+      - 86aa65c9a895162b-DUS
       Connection:
       - keep-alive
       Content-Encoding:
-      - gzip
-      Content-Profile:
-      - public
+      - br
       Content-Range:
       - 0-0/*
       Content-Type:
       - application/json; charset=utf-8
       Date:
-      - Fri, 23 Feb 2024 15:44:28 GMT
+      - Tue, 26 Mar 2024 21:57:39 GMT
       Server:
       - cloudflare
       Strict-Transport-Security:
@@ -134,14 +132,16 @@ interactions:
       - Accept-Encoding
       Via:
       - kong/2.8.1
-      X-Kong-Proxy-Latency:
-      - '0'
-      X-Kong-Upstream-Latency:
-      - '3'
       alt-svc:
       - h3=":443"; ma=86400
+      content-profile:
+      - public
       sb-gateway-version:
       - '1'
+      x-kong-proxy-latency:
+      - '0'
+      x-kong-upstream-latency:
+      - '5'
     http_version: HTTP/1.1
     status_code: 200
 version: 1