Skip to content

Commit

Permalink
updated tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Mar 26, 2024
1 parent eb2cd21 commit 04de9a2
Show file tree
Hide file tree
Showing 23 changed files with 1,166 additions and 266,771 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
fly.toml
.venv
/tests
/build
6 changes: 3 additions & 3 deletions api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ async def post_posts():
@app.route("/posts/<slug>")
@app.route("/posts/<slug>/<suffix>")
@app.route("/posts/<slug>/<suffix>/<relation>")
async def post(slug: str, suffix: Optional[str] = None, relation:Optional[str] = None ):
async def post(slug: str, suffix: Optional[str] = None, relation: Optional[str] = None):
"""Get post by slug."""
prefixes = [
"10.13003",
Expand Down Expand Up @@ -470,7 +470,7 @@ async def post(slug: str, suffix: Optional[str] = None, relation:Optional[str] =
references = response.data.get("reference", [])
count = len(references)
return jsonify({"total-results": count, "items": references})
elif slug in prefixes and suffix:
elif slug in prefixes and suffix:
path = suffix.split(".")
if len(path) > 1 and path[-1] in [
"md",
Expand Down Expand Up @@ -528,7 +528,7 @@ async def post(slug: str, suffix: Optional[str] = None, relation:Optional[str] =
"tags": "keywords",
"updated_at": "date_updated",
"blog.issn": "issn",
"blog.license": "license"
"blog.license": "license",
},
)
metadata = py_.omit(
Expand Down
47 changes: 25 additions & 22 deletions api/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import time
import traceback
from urllib.parse import unquote
from commonmeta import validate_doi, normalize_doi, normalize_id, validate_url
from commonmeta import validate_doi, normalize_id, validate_url
from Levenshtein import ratio

from api.utils import (
Expand Down Expand Up @@ -326,9 +326,7 @@ def format_author(author, published_at):
py_.get(post, "_embedded.wp:featuredmedia[0].source_url", None)
or py_.get(post, "yoast_head_json.og_image[0].url", None)
or post.get("jetpack_featured_media_url", None)
)
if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
) or get_image(images)

# optionally remove category that is used to filter posts
if blog.get("filter", None) and blog.get("filter", "").startswith("category"):
Expand Down Expand Up @@ -419,9 +417,7 @@ def format_author(author, published_at):
blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
)
images = get_images(content_html, url, blog.get("home_page_url", None))
image = None
if len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
image = get_image(images)
tags = [
normalize_tag(i)
for i in post.get("categories", None).keys()
Expand Down Expand Up @@ -484,9 +480,7 @@ def format_author(author, published_at):
blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
)
images = get_images(content_html, url, blog.get("home_page_url", None))
image = post.get("feature_image", None)
if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
image = post.get("feature_image", None) or get_image(images)
tags = [
normalize_tag(i.get("name", None))
for i in post.get("tags", None)
Expand Down Expand Up @@ -548,9 +542,7 @@ def format_author(author, published_at):
blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
)
images = get_images(content_html, url, blog.get("home_page_url", None))
image = post.get("cover_image", None)
if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
image = post.get("cover_image", None) or get_image(images)
tags = [
normalize_tag(i.get("name"))
for i in wrap(post.get("postTags", None))
Expand Down Expand Up @@ -616,9 +608,7 @@ def format_author(author, published_at):
blog["archive_prefix"] + url if blog.get("archive_prefix", None) else None
)
images = get_images(content_html, url, blog.get("home_page_url", None))
image = post.get("assetUrl", None)
if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
image = post.get("assetUrl", None) or get_image(images)
tags = [
normalize_tag(i)
for i in wrap(post.get("categories", None))
Expand Down Expand Up @@ -684,9 +674,7 @@ def format_author(author, published_at):
if blog.get("relative_url", None) == "blog":
base_url = blog.get("home_page_url", None)
images = get_images(content_html, base_url, blog.get("home_page_url", None))
image = py_.get(post, "media:thumbnail.@url", None)
if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
image = py_.get(post, "media:thumbnail.@url", None) or get_image(images)
tags = [
normalize_tag(i)
for i in wrap(post.get("tags", None))
Expand Down Expand Up @@ -776,16 +764,14 @@ def get_url(links):
if blog.get("relative_url", None) == "blog":
base_url = blog.get("home_page_url", None)
images = get_images(content_html, base_url, blog.get("home_page_url", None))
image = py_.get(post, "media:thumbnail.@url", None)
image = py_.get(post, "media:thumbnail.@url", None) or get_image(images)
# workaround for eve blog
if image is not None:
f = furl(image)
if f.host == "eve.gd":
image = unquote(image)
if f.path.segments[0] != "images":
image = f.set(path="/images/" + f.path.segments[0]).url
if not image and len(images) > 0 and int(images[0].get("width", 200)) >= 200:
image = images[0].get("src", None)
tags = [
normalize_tag(i.get("@term", None))
for i in wrap(post.get("category", None))
Expand Down Expand Up @@ -1065,6 +1051,23 @@ def get_title(content_html: str):
return sanitized


def get_image(images: list, width: int = 200):
"""Get first image with width >= 200."""
if not images or len(images) == 0:
return None
try:
return next(
(
image.get("src", None)
for image in images
if int(image.get("width", 200)) >= width
),
None,
)
except ValueError:
return None


def get_summary(content_html: str = None, maxlen: int = 450):
"""Get summary from excerpt or content_html."""
if not content_html:
Expand Down
50 changes: 25 additions & 25 deletions tests/cassettes/test-blogs/test_update_single_blog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ interactions:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
- gzip, deflate, br
accept-profile:
- public
connection:
Expand All @@ -21,32 +21,30 @@ interactions:
user-agent:
- python-httpx/0.25.2
x-client-info:
- supabase-py/2.3.5
- supabase-py/2.4.0
method: GET
uri: https://db.rogue-scholar.org/rest/v1/posts?select=updated_at&blog_slug=eq.epub_fis&order=updated_at.desc&limit=1
response:
content: '[{"updated_at":1706534803}]'
content: '[{"updated_at":1710745399}]'
headers:
Access-Control-Allow-Origin:
- '*'
CF-Cache-Status:
- DYNAMIC
CF-Ray:
- 85a0971eba5f1635-DUS
- 86aa65c8098acb2b-DUS
Connection:
- keep-alive
Content-Encoding:
- gzip
- br
Content-Location:
- /posts?blog_slug=eq.epub_fis&limit=1&order=updated_at.desc&select=updated_at
Content-Profile:
- public
Content-Range:
- 0-0/*
Content-Type:
- application/json; charset=utf-8
Date:
- Fri, 23 Feb 2024 15:44:28 GMT
- Tue, 26 Mar 2024 21:57:39 GMT
Server:
- cloudflare
Strict-Transport-Security:
Expand All @@ -57,21 +55,23 @@ interactions:
- Accept-Encoding
Via:
- kong/2.8.1
X-Kong-Proxy-Latency:
- '1'
X-Kong-Upstream-Latency:
- '13'
alt-svc:
- h3=":443"; ma=86400
content-profile:
- public
sb-gateway-version:
- '1'
x-kong-proxy-latency:
- '1'
x-kong-upstream-latency:
- '20'
http_version: HTTP/1.1
status_code: 200
- request:
body: '{"title": "FIS & EPub", "description": "Gemeinsamer Blog der DINI AG Forschungsinformationssystem
und Elektronisches Publizieren", "feed_url": "https://blog.dini.de/EPub_FIS/feed/atom/",
"current_feed_url": null, "home_page_url": "https://blog.dini.de/EPub_FIS",
"feed_format": "application/atom+xml", "updated_at": 1706534803, "language":
"feed_format": "application/atom+xml", "updated_at": 1710745399, "language":
"de", "category": "socialSciences", "favicon": "https://blog.dini.de/EPub_FIS/wp-content/uploads/2018/03/cropped-DINI-AG-FIS-3-1-150x150.png",
"license": "https://creativecommons.org/licenses/by/4.0/legalcode", "generator":
"WordPress 6.3.2", "generator_raw": null, "status": "active", "user_id": "a9e3541e-1e00-4bf3-8a4d-fc9b1c505651",
Expand All @@ -80,7 +80,7 @@ interactions:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
- gzip, deflate, br
accept-profile:
- public
connection:
Expand All @@ -98,32 +98,30 @@ interactions:
user-agent:
- python-httpx/0.25.2
x-client-info:
- supabase-py/2.3.5
- supabase-py/2.4.0
method: PATCH
uri: https://db.rogue-scholar.org/rest/v1/blogs?slug=eq.epub_fis
response:
content: '[{"title":"FIS & EPub","description":"Gemeinsamer Blog der DINI AG Forschungsinformationssystem
und Elektronisches Publizieren","language":"de","favicon":"https://blog.dini.de/EPub_FIS/wp-content/uploads/2018/03/cropped-DINI-AG-FIS-3-1-150x150.png","feed_url":"https://blog.dini.de/EPub_FIS/feed/atom/","home_page_url":"https://blog.dini.de/EPub_FIS","user_id":"a9e3541e-1e00-4bf3-8a4d-fc9b1c505651","feed_format":"application/atom+xml","license":"https://creativecommons.org/licenses/by/4.0/legalcode","generator":"WordPress
6.3.2","category":"socialSciences","prefix":null,"current_feed_url":null,"status":"active","issn":"2940-1798","backlog":0,"authors":null,"plan":"Team","slug":"epub_fis","use_mastodon":false,"api":true,"funding":null,"archive_prefix":"https://wayback.archive-it.org/22121/20231105105752/","use_api":true,"relative_url":null,"canonical_url":null,"filter":null,"updated_at":1706534803,"secure":null,"created_at":1689897600,"indexed":null,"generator_raw":null,"id":"2e00349e-defb-4459-9653-f284f516ac20","ror":null,"mastodon":null}]'
6.3.2","category":"socialSciences","prefix":null,"current_feed_url":null,"status":"active","issn":"2940-1798","authors":null,"plan":"Team","slug":"epub_fis","use_mastodon":false,"funding":null,"archive_prefix":"https://wayback.archive-it.org/22121/20231105105752/","use_api":true,"relative_url":null,"canonical_url":null,"filter":null,"updated_at":1710745399,"secure":null,"created_at":1689897600,"indexed":null,"generator_raw":null,"id":"2e00349e-defb-4459-9653-f284f516ac20","ror":null,"mastodon":null}]'
headers:
Access-Control-Allow-Origin:
- '*'
CF-Cache-Status:
- DYNAMIC
CF-Ray:
- 85a0971f3a52cb09-DUS
- 86aa65c9a895162b-DUS
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Profile:
- public
- br
Content-Range:
- 0-0/*
Content-Type:
- application/json; charset=utf-8
Date:
- Fri, 23 Feb 2024 15:44:28 GMT
- Tue, 26 Mar 2024 21:57:39 GMT
Server:
- cloudflare
Strict-Transport-Security:
Expand All @@ -134,14 +132,16 @@ interactions:
- Accept-Encoding
Via:
- kong/2.8.1
X-Kong-Proxy-Latency:
- '0'
X-Kong-Upstream-Latency:
- '3'
alt-svc:
- h3=":443"; ma=86400
content-profile:
- public
sb-gateway-version:
- '1'
x-kong-proxy-latency:
- '0'
x-kong-upstream-latency:
- '5'
http_version: HTTP/1.1
status_code: 200
version: 1

0 comments on commit 04de9a2

Please sign in to comment.