Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add yaml header check #184

Merged
merged 28 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
53969f2
Add yaml header check wip
Mar 17, 2024
a43e560
Add tags check using JSON feed
Mar 18, 2024
36dcb42
Add some tests
Mar 23, 2024
bef103d
Add mandatory keys check
Mar 23, 2024
1a57ebb
Check tags alphabetical order
Apr 1, 2024
274e435
Remove publish date check
Apr 17, 2024
892cc5c
Use utils function for checking if file exists
Apr 26, 2024
b9301c7
Merge branch 'main' into feature/add-markdown-header-checker
Guts May 2, 2024
26c12ab
Merge branch 'main' into feature/add-markdown-header-checker
gounux May 3, 2024
45e7764
Use JSON feed client
May 3, 2024
a2c0d57
Use frontmatter to load yaml metadata
May 17, 2024
536e82a
Accept multiple path parameters
May 17, 2024
7224615
Add missing dependency
May 17, 2024
91176fd
Edit how image sizes are checked
May 20, 2024
d45db58
Remove icon and subtitle from mandatory keys
May 20, 2024
3633902
Check author markdown file
May 23, 2024
f983c64
Fix author md check
May 23, 2024
2a0cc93
Add image size by url function
May 23, 2024
14c8a32
Add author md check tests
May 23, 2024
ae13e58
Merge branch 'main' into feature/add-markdown-header-checker
gounux May 23, 2024
f4f5c63
Update geotribu_cli/content/header_check.py
gounux May 27, 2024
275fc9d
Update geotribu_cli/content/header_check.py
gounux May 27, 2024
2659b51
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 27, 2024
856213d
Update geotribu_cli/content/header_check.py
gounux May 27, 2024
77b838c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 27, 2024
61e8f56
Add missing imports
May 27, 2024
d4bbcbd
Use slugger for author md file
May 28, 2024
6a43fcb
No more unidecode
May 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions geotribu_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
parser_comments_broadcast,
parser_comments_latest,
parser_comments_read,
parser_header_check,
parser_images_optimizer,
parser_latest_content,
parser_mastodon_export,
Expand Down Expand Up @@ -231,6 +232,16 @@ def main(args: list[str] = None):
add_common_arguments(subcmd_upgrade)
parser_upgrade(subcmd_upgrade)

subcmd_header_check = subparsers.add_parser(
"header-check",
aliases=["header", "check", "header-check", "metadata"],
help="Vérifier entête markdown",
formatter_class=main_parser.formatter_class,
prog="header-check",
)
add_common_arguments(subcmd_header_check)
parser_header_check(subcmd_header_check)

# -- NESTED SUBPARSER : CREATE ---------------------------------------------------
subcmd_content_manager = subparsers.add_parser(
"creer",
Expand Down
239 changes: 239 additions & 0 deletions geotribu_cli/content/header_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
import argparse
import logging
import os
from pathlib import Path

import frontmatter

from geotribu_cli.constants import GeotribuDefaults
from geotribu_cli.json.json_client import JsonFeedClient
from geotribu_cli.utils.check_image_size import get_image_dimensions_by_url
from geotribu_cli.utils.check_path import check_path
from geotribu_cli.utils.slugger import sluggy

logger = logging.getLogger(__name__)
defaults_settings = GeotribuDefaults()

MANDATORY_KEYS = [
"title",
"authors",
"categories",
"date",
"description",
"license",
"tags",
]

# ############################################################################
# ########## CLI #################
# ################################


def parser_header_check(
subparser: argparse.ArgumentParser,
) -> argparse.ArgumentParser:
"""Set the argument parser subcommand.

Args:
subparser (argparse.ArgumentParser): parser to set up

Returns:
argparse.ArgumentParser: parser ready to use
"""
subparser.add_argument(
"content_path",
help="Chemin du fichier markdown dont l'entête est à vérifier",
type=Path,
metavar="content",
nargs="+",
)
subparser.add_argument(
"-af",
"--authors-folder",
dest="authors_folder",
type=Path,
help="Chemin qui contient les presentations markdown des auteurs/autrices",
)
subparser.add_argument(
"-minw",
"--min-width",
dest="min_image_width",
default=400,
type=int,
help="Largeur minimum de l'image à vérifier",
)
subparser.add_argument(
"-maxw",
"--max-width",
dest="max_image_width",
default=800,
type=int,
help="Largeur maximum de l'image à vérifier",
)
subparser.add_argument(
"-minh",
"--min-height",
dest="min_image_height",
default=400,
type=int,
help="Hauteur minimum de l'image à vérifier",
)
subparser.add_argument(
"-maxh",
"--max-height",
dest="max_image_height",
default=800,
type=int,
help="Hauteur maximum de l'image à vérifier",
)
subparser.add_argument(
"-r",
"--raise",
dest="raise_exceptions",
action="store_true",
default=False,
help="Lever des exceptions et donc arrêter le programme si des erreurs sont rencontrées",
)
subparser.set_defaults(func=run)
return subparser


# ############################################################################
# ########## MAIN ################
# ################################


def check_author_md(author: str, folder: Path) -> bool:
if author == "Geotribu":
return True
p = os.path.join(folder, f"{sluggy(author)}.md")
return os.path.exists(p)


def check_image_size(
image_url: str, minw: int, maxw: int, minh: int, maxh: int
) -> bool:
width, height = get_image_dimensions_by_url(image_url)
return minw <= width <= maxw and minh <= height <= maxh


def get_existing_tags() -> list[str]:
jfc = JsonFeedClient()
return jfc.tags(should_sort=True)


def check_existing_tags(tags: list[str]) -> tuple[bool, set[str], set[str]]:
existing_tags = get_existing_tags()
all_exists = set(tags).issubset(existing_tags)
missing = set(tags).difference(existing_tags)
present = set(tags).intersection(existing_tags)
return all_exists, missing, present


def check_tags_order(tags: list[str]) -> bool:
for i in range(len(tags) - 1):
if sluggy(tags[i].upper()) > sluggy(tags[i + 1].upper()):
return False
return True


def check_mandatory_keys(
keys: list[str], mandatory: list[str] = MANDATORY_KEYS
) -> tuple[bool, set[str]]:
missing = set()
for mk in mandatory:
if mk not in keys:
missing.add(mk)
return len(missing) == 0, missing


def run(args: argparse.Namespace) -> None:
"""Run the sub command logic.

Checks YAML header of a content

Args:
args (argparse.Namespace): arguments passed to the subcommand
"""
logger.debug(f"Running {args.command} with {args}")
content_paths: list[Path] = args.content_path

for content_path in content_paths:
logger.info(f"Checking header of {content_path}")
check_path(
input_path=content_path,
must_be_a_file=True,
must_be_a_folder=False,
must_be_readable=True,
raise_error=True,
)

with content_path.open(mode="r", encoding="UTF-8") as file:
content = frontmatter.load(file)
yaml_meta = content.metadata
logger.debug(f"YAML metadata loaded : {yaml_meta}")

# check that image size is okay
if "image" in yaml_meta:
if not yaml_meta["image"]:
logger.error("Pas d'URL pour l'image")
elif not check_image_size(
yaml_meta["image"],
args.min_image_width,
args.max_image_width,
args.min_image_height,
args.max_image_height,
):
msg = (
f"Les dimensions de l'image ne sont pas dans l'intervalle autorisé "
f"(w:{args.min_image_width}-{args.max_image_width},"
f"h:{args.min_image_height}-{args.max_image_height})"
)
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Dimensions de l'image ok")

# check that author md file is present
if args.authors_folder:
for author in yaml_meta["authors"]:
author_exists = check_author_md(author, args.authors_folder)
if not author_exists:
msg = f"Le fichier de l'auteur/autrice '{author}' n'a pas pu être trouvé dans le répertoire"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info(f"Markdown de l'auteur/autrice '{author}' ok")

# check that tags already exist
all_exists, missing, _ = check_existing_tags(yaml_meta["tags"])
if not all_exists:
msg = f"Les tags suivants n'existent pas dans les contenus Geotribu précédents : {','.join(missing)}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Existence des tags ok")

# check if tags are alphabetically sorted
if not check_tags_order(yaml_meta["tags"]):
msg = f"Les tags ne sont pas triés par ordre alphabétique : {yaml_meta['tags']}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Ordre alphabétique des tags ok")

# check that mandatory keys are present
all_present, missing = check_mandatory_keys(
yaml_meta.keys(), MANDATORY_KEYS
)
if not all_present:
msg = f"Les clés suivantes ne sont pas présentes dans l'entête markdown : {','.join(missing)}"
logger.error(msg)
if args.raise_exceptions:
raise ValueError(msg)
else:
logger.info("Clés de l'entête ok")
1 change: 1 addition & 0 deletions geotribu_cli/subcommands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from geotribu_cli.comments import parser_comments_broadcast # noqa: F401
from geotribu_cli.comments import parser_comments_latest # noqa: F401
from geotribu_cli.comments import parser_comments_read # noqa: F401
from geotribu_cli.content.header_check import parser_header_check # noqa: F401
from geotribu_cli.content.new_article import parser_new_article # noqa: F401
from geotribu_cli.images.images_optimizer import parser_images_optimizer # noqa: F401
from geotribu_cli.rss.rss_reader import parser_latest_content # noqa: F401
Expand Down
21 changes: 21 additions & 0 deletions geotribu_cli/utils/check_image_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
from decimal import Decimal
from pathlib import Path
from typing import Union
from urllib import request

# 3rd party
import imagesize
from PIL import ImageFile

# #############################################################################
# ########## Globals ###############
Expand Down Expand Up @@ -61,6 +63,25 @@ def get_image_size(image_filepath: Path) -> tuple[int, int]:
return None


def get_image_dimensions_by_url(url: str) -> tuple[int, int]:
"""Get image dimensions as a tuple (width,height) of an image at an URL. Return None in case of error or no data.

:param str url: url of the image

:return Tuple[int, int]: dimensions tuple (width,height)
"""
with request.urlopen(url) as file:
parser = ImageFile.Parser()
while True:
data = file.read(1024)
if not data:
break
parser.feed(data)
if parser.image:
return parser.image.size
return None


def get_svg_size(image_filepath: Path) -> tuple[int, int]:
"""Extract SVG width and height from a SVG file and convert them into integers. \
Relevant and working only if the file root has width and height attributes.
Expand Down
20 changes: 20 additions & 0 deletions tests/fixtures/content/2012-12-21_article_passe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
title: Article supposément rédigé dans le passé
subtitle: Article supposément rédigé dans le passé pour tests
authors:
- Jane Doe
categories:
- article
comments: true
date: 2012-12-21
description: Article supposément rédigé dans le passé
icon: octicons/server-16
license: beerware
robots: index, follow
tags:
- Fromage
- OSM
- QGIS
---

# Article supposément rédigé dans le futur
19 changes: 19 additions & 0 deletions tests/fixtures/content/2044-04-01_article_futur.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
title: Article supposément rédigé dans le futur
subtitle: Article supposément rédigé dans le futur pour tests
authors:
- Jane Doe
categories:
- article
comments: true
date: 2044-04-01
icon: octicons/server-16
robots: index, follow
tags:
- Fromage
- IGN
- QGIS
- OSM
---

# Article supposément rédigé dans le futur
26 changes: 26 additions & 0 deletions tests/fixtures/team/jane-doe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
title: Jane Doe
categories:
- contributeur
social:
- bluesky:
- github:
- gitlab:
- linkedin:
- mail:
- mastodon:
- instance:
- username:
- openstreetmap:
- osgeo:
- twitter:
- website:
---

# Jane Doe

<!-- --8<-- [start:author-sign-block] -->

Test

<!-- --8<-- [end:author-sign-block] -->
Loading
Loading