Skip to content

Commit

Permalink
Add yaml header check (#184)
Browse files Browse the repository at this point in the history
Add yaml header check

---------

Signed-off-by: Guilhem Allaman <40383801+gounux@users.noreply.github.com>
Co-authored-by: gounux <contact@guilhemallaman.net>
Co-authored-by: Julien <dev@ingeoveritas.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people committed May 28, 2024
1 parent 9b016c3 commit c540843
Show file tree
Hide file tree
Showing 9 changed files with 428 additions and 1 deletion.
11 changes: 11 additions & 0 deletions geotribu_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
parser_comments_broadcast,
parser_comments_latest,
parser_comments_read,
parser_header_check,
parser_images_optimizer,
parser_latest_content,
parser_mastodon_export,
Expand Down Expand Up @@ -231,6 +232,16 @@ def main(args: list[str] = None):
add_common_arguments(subcmd_upgrade)
parser_upgrade(subcmd_upgrade)

subcmd_header_check = subparsers.add_parser(
"header-check",
aliases=["header", "check", "header-check", "metadata"],
help="Vérifier entête markdown",
formatter_class=main_parser.formatter_class,
prog="header-check",
)
add_common_arguments(subcmd_header_check)
parser_header_check(subcmd_header_check)

# -- NESTED SUBPARSER : CREATE ---------------------------------------------------
subcmd_content_manager = subparsers.add_parser(
"creer",
Expand Down
239 changes: 239 additions & 0 deletions geotribu_cli/content/header_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
import argparse
import logging
import os
from pathlib import Path

import frontmatter

from geotribu_cli.constants import GeotribuDefaults
from geotribu_cli.json.json_client import JsonFeedClient
from geotribu_cli.utils.check_image_size import get_image_dimensions_by_url
from geotribu_cli.utils.check_path import check_path
from geotribu_cli.utils.slugger import sluggy

logger = logging.getLogger(__name__)
defaults_settings = GeotribuDefaults()

MANDATORY_KEYS = [
"title",
"authors",
"categories",
"date",
"description",
"license",
"tags",
]

# ############################################################################
# ########## CLI #################
# ################################


def parser_header_check(
subparser: argparse.ArgumentParser,
) -> argparse.ArgumentParser:
"""Set the argument parser subcommand.
Args:
subparser (argparse.ArgumentParser): parser to set up
Returns:
argparse.ArgumentParser: parser ready to use
"""
subparser.add_argument(
"content_path",
help="Chemin du fichier markdown dont l'entête est à vérifier",
type=Path,
metavar="content",
nargs="+",
)
subparser.add_argument(
"-af",
"--authors-folder",
dest="authors_folder",
type=Path,
help="Chemin qui contient les presentations markdown des auteurs/autrices",
)
subparser.add_argument(
"-minw",
"--min-width",
dest="min_image_width",
default=400,
type=int,
help="Largeur minimum de l'image à vérifier",
)
subparser.add_argument(
"-maxw",
"--max-width",
dest="max_image_width",
default=800,
type=int,
help="Largeur maximum de l'image à vérifier",
)
subparser.add_argument(
"-minh",
"--min-height",
dest="min_image_height",
default=400,
type=int,
help="Hauteur minimum de l'image à vérifier",
)
subparser.add_argument(
"-maxh",
"--max-height",
dest="max_image_height",
default=800,
type=int,
help="Hauteur maximum de l'image à vérifier",
)
subparser.add_argument(
"-r",
"--raise",
dest="raise_exceptions",
action="store_true",
default=False,
help="Lever des exceptions et donc arrêter le programme si des erreurs sont rencontrées",
)
subparser.set_defaults(func=run)
return subparser


# ############################################################################
# ########## MAIN ################
# ################################


def check_author_md(author: str, folder: Path) -> bool:
if author == "Geotribu":
return True
p = os.path.join(folder, f"{sluggy(author)}.md")
return os.path.exists(p)


def check_image_size(
image_url: str, minw: int, maxw: int, minh: int, maxh: int
) -> bool:
width, height = get_image_dimensions_by_url(image_url)
return minw <= width <= maxw and minh <= height <= maxh


def get_existing_tags() -> list[str]:
jfc = JsonFeedClient()
return jfc.tags(should_sort=True)


def check_existing_tags(tags: list[str]) -> tuple[bool, set[str], set[str]]:
existing_tags = get_existing_tags()
all_exists = set(tags).issubset(existing_tags)
missing = set(tags).difference(existing_tags)
present = set(tags).intersection(existing_tags)
return all_exists, missing, present


def check_tags_order(tags: list[str]) -> bool:
for i in range(len(tags) - 1):
if sluggy(tags[i].upper()) > sluggy(tags[i + 1].upper()):
return False
return True


def check_mandatory_keys(
keys: list[str], mandatory: list[str] = MANDATORY_KEYS
) -> tuple[bool, set[str]]:
missing = set()
for mk in mandatory:
if mk not in keys:
missing.add(mk)
return len(missing) == 0, missing


def run(args: argparse.Namespace) -> None:
"""Run the sub command logic.
Checks YAML header of a content
Args:
args (argparse.Namespace): arguments passed to the subcommand
"""
logger.debug(f"Running {args.command} with {args}")
content_paths: list[Path] = args.content_path

for content_path in content_paths:
logger.info(f"Checking header of {content_path}")
check_path(
input_path=content_path,
must_be_a_file=True,
must_be_a_folder=False,
must_be_readable=True,
raise_error=True,
)

with content_path.open(mode="r", encoding="UTF-8") as file:
content = frontmatter.load(file)
yaml_meta = content.metadata
logger.debug(f"YAML metadata loaded : {yaml_meta}")

# check that image size is okay
if "image" in yaml_meta:
if not yaml_meta["image"]:
logger.error("Pas d'URL pour l'image")
elif not check_image_size(
yaml_meta["image"],
args.min_image_width,
args.max_image_width,
args.min_image_height,
args.max_image_height,
):
msg = (
f"Les dimensions de l'image ne sont pas dans l'intervalle autorisé "
f"(w:{args.min_image_width}-{args.max_image_width},"
f"h:{args.min_image_height}-{args.max_image_height})"
)
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Dimensions de l'image ok")

# check that author md file is present
if args.authors_folder:
for author in yaml_meta["authors"]:
author_exists = check_author_md(author, args.authors_folder)
if not author_exists:
msg = f"Le fichier de l'auteur/autrice '{author}' n'a pas pu être trouvé dans le répertoire"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info(f"Markdown de l'auteur/autrice '{author}' ok")

# check that tags already exist
all_exists, missing, _ = check_existing_tags(yaml_meta["tags"])
if not all_exists:
msg = f"Les tags suivants n'existent pas dans les contenus Geotribu précédents : {','.join(missing)}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Existence des tags ok")

# check if tags are alphabetically sorted
if not check_tags_order(yaml_meta["tags"]):
msg = f"Les tags ne sont pas triés par ordre alphabétique : {yaml_meta['tags']}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Ordre alphabétique des tags ok")

# check that mandatory keys are present
all_present, missing = check_mandatory_keys(
yaml_meta.keys(), MANDATORY_KEYS
)
if not all_present:
msg = f"Les clés suivantes ne sont pas présentes dans l'entête markdown : {','.join(missing)}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Clés de l'entête ok")
1 change: 1 addition & 0 deletions geotribu_cli/subcommands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from geotribu_cli.comments import parser_comments_broadcast # noqa: F401
from geotribu_cli.comments import parser_comments_latest # noqa: F401
from geotribu_cli.comments import parser_comments_read # noqa: F401
from geotribu_cli.content.header_check import parser_header_check # noqa: F401
from geotribu_cli.content.new_article import parser_new_article # noqa: F401
from geotribu_cli.images.images_optimizer import parser_images_optimizer # noqa: F401
from geotribu_cli.rss.rss_reader import parser_latest_content # noqa: F401
Expand Down
21 changes: 21 additions & 0 deletions geotribu_cli/utils/check_image_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
from decimal import Decimal
from pathlib import Path
from typing import Union
from urllib import request

# 3rd party
import imagesize
from PIL import ImageFile

# #############################################################################
# ########## Globals ###############
Expand Down Expand Up @@ -61,6 +63,25 @@ def get_image_size(image_filepath: Path) -> tuple[int, int]:
return None


def get_image_dimensions_by_url(url: str) -> tuple[int, int]:
"""Get image dimensions as a tuple (width,height) of an image at an URL. Return None in case of error or no data.
:param str url: url of the image
:return Tuple[int, int]: dimensions tuple (width,height)
"""
with request.urlopen(url) as file:
parser = ImageFile.Parser()
while True:
data = file.read(1024)
if not data:
break
parser.feed(data)
if parser.image:
return parser.image.size
return None


def get_svg_size(image_filepath: Path) -> tuple[int, int]:
"""Extract SVG width and height from a SVG file and convert them into integers. \
Relevant and working only if the file root has width and height attributes.
Expand Down
20 changes: 20 additions & 0 deletions tests/fixtures/content/2012-12-21_article_passe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
title: Article supposément rédigé dans le passé
subtitle: Article supposément rédigé dans le passé pour tests
authors:
- Jane Doe
categories:
- article
comments: true
date: 2012-12-21
description: Article supposément rédigé dans le passé
icon: octicons/server-16
license: beerware
robots: index, follow
tags:
- Fromage
- OSM
- QGIS
---

# Article supposément rédigé dans le futur
19 changes: 19 additions & 0 deletions tests/fixtures/content/2044-04-01_article_futur.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
title: Article supposément rédigé dans le futur
subtitle: Article supposément rédigé dans le futur pour tests
authors:
- Jane Doe
categories:
- article
comments: true
date: 2044-04-01
icon: octicons/server-16
robots: index, follow
tags:
- Fromage
- IGN
- QGIS
- OSM
---

# Article supposément rédigé dans le futur
26 changes: 26 additions & 0 deletions tests/fixtures/team/jane-doe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
title: Jane Doe
categories:
- contributeur
social:
- bluesky:
- github:
- gitlab:
- linkedin:
- mail:
- mastodon:
- instance:
- username:
- openstreetmap:
- osgeo:
- twitter:
- website:
---

# Jane Doe

<!-- --8<-- [start:author-sign-block] -->

Test

<!-- --8<-- [end:author-sign-block] -->

0 comments on commit c540843

Please sign in to comment.