Skip to content

Commit

Permalink
improve parsing json with error message
Browse files Browse the repository at this point in the history
  • Loading branch information
mfenner committed Jun 15, 2024
1 parent 01609a8 commit 4738d9b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 5 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:1.5
ARG BUILDPLATFORM=linux/amd64
FROM --platform=$BUILDPLATFORM python:3.12-bookworm as builder
FROM --platform=$BUILDPLATFORM python:3.12-bookworm AS builder

# Dockerfile that builds the Rogue Scholar API Docker image. Based on the following:
# - https://medium.com/@albertazzir/blazing-fast-python-docker-builds-with-poetry-a78a66f5aed0
Expand All @@ -11,7 +11,7 @@ FROM --platform=$BUILDPLATFORM python:3.12-bookworm as builder
# Install OS package dependency: pandoc
# install poetry to manage Python dependencies
ENV PANDOC_VERSION=3.1.12.3 \
POETRY_VERSION=1.8.2
POETRY_VERSION=1.8.3

ADD https://github.com/jgm/pandoc/releases/download/${PANDOC_VERSION}/pandoc-${PANDOC_VERSION}-1-amd64.deb /tmp/pandoc-${PANDOC_VERSION}-1-amd64.deb

Expand All @@ -35,7 +35,7 @@ RUN touch README.md
RUN --mount=type=cache,target=$POETRY_CACHE_DIR poetry install --without dev --no-root --no-interaction --no-ansi


FROM --platform=$BUILDPLATFORM python:3.12-slim-bookworm as runtime
FROM --platform=$BUILDPLATFORM python:3.12-slim-bookworm AS runtime

# Install OS package dependency (for weasyprint): pango
RUN --mount=type=cache,target=/var/cache/apt apt-get update -y && \
Expand Down
8 changes: 6 additions & 2 deletions api/posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional
from furl import furl
import httpx
import json as JSON
import asyncio
import re
import pydash as py_
Expand Down Expand Up @@ -195,7 +196,7 @@ async def extract_all_posts_by_blog(

feed_url = url.set(params).url
blog_with_posts = {}
# print(feed_url)
print(f"Extracting posts from {blog['slug']} at {feed_url}.")

# use pagination of results only for non-API blogs
if params:
Expand All @@ -214,7 +215,10 @@ async def extract_all_posts_by_blog(
elif generator == "WordPress" and blog["use_api"]:
async with httpx.AsyncClient() as client:
response = await client.get(feed_url, timeout=30, follow_redirects=True)
posts = response.json()
# filter out error messages that are not valid json
json_start = response.text.find('[{')
response = response.text[json_start:]
posts = JSON.loads(response)
if not update_all:
posts = filter_updated_posts(posts, blog, key="modified_gmt")
extract_posts = [extract_wordpress_post(x, blog) for x in posts]
Expand Down
5 changes: 5 additions & 0 deletions api/works.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from os import environ
import httpx

# from api.supabase import (
# supabase_client as supabase,
# supabase_admin_client as supabase_admin,
# )

# supported accept headers for content negotiation
SUPPORTED_ACCEPT_HEADERS = [
"application/vnd.commonmeta+json",
Expand Down

0 comments on commit 4738d9b

Please sign in to comment.