diff --git a/geotribu_cli/cli.py b/geotribu_cli/cli.py index dff08db..90e4fa2 100644 --- a/geotribu_cli/cli.py +++ b/geotribu_cli/cli.py @@ -29,6 +29,7 @@ parser_comments_broadcast, parser_comments_latest, parser_comments_read, + parser_header_check, parser_images_optimizer, parser_latest_content, parser_mastodon_export, @@ -231,6 +232,16 @@ def main(args: list[str] = None): add_common_arguments(subcmd_upgrade) parser_upgrade(subcmd_upgrade) + subcmd_header_check = subparsers.add_parser( + "header-check", + aliases=["header", "check", "header-check", "metadata"], + help="Vérifier entête markdown", + formatter_class=main_parser.formatter_class, + prog="header-check", + ) + add_common_arguments(subcmd_header_check) + parser_header_check(subcmd_header_check) + # -- NESTED SUBPARSER : CREATE --------------------------------------------------- subcmd_content_manager = subparsers.add_parser( "creer", diff --git a/geotribu_cli/content/header_check.py b/geotribu_cli/content/header_check.py new file mode 100644 index 0000000..cb8af9e --- /dev/null +++ b/geotribu_cli/content/header_check.py @@ -0,0 +1,239 @@ +import argparse +import logging +import os +from pathlib import Path + +import frontmatter + +from geotribu_cli.constants import GeotribuDefaults +from geotribu_cli.json.json_client import JsonFeedClient +from geotribu_cli.utils.check_image_size import get_image_dimensions_by_url +from geotribu_cli.utils.check_path import check_path +from geotribu_cli.utils.slugger import sluggy + +logger = logging.getLogger(__name__) +defaults_settings = GeotribuDefaults() + +MANDATORY_KEYS = [ + "title", + "authors", + "categories", + "date", + "description", + "license", + "tags", +] + +# ############################################################################ +# ########## CLI ################# +# ################################ + + +def parser_header_check( + subparser: argparse.ArgumentParser, +) -> argparse.ArgumentParser: + """Set the argument parser subcommand. + + Args: + subparser (argparse.ArgumentParser): parser to set up + + Returns: + argparse.ArgumentParser: parser ready to use + """ + subparser.add_argument( + "content_path", + help="Chemin du fichier markdown dont l'entête est à vérifier", + type=Path, + metavar="content", + nargs="+", + ) + subparser.add_argument( + "-af", + "--authors-folder", + dest="authors_folder", + type=Path, + help="Chemin qui contient les presentations markdown des auteurs/autrices", + ) + subparser.add_argument( + "-minw", + "--min-width", + dest="min_image_width", + default=400, + type=int, + help="Largeur minimum de l'image à vérifier", + ) + subparser.add_argument( + "-maxw", + "--max-width", + dest="max_image_width", + default=800, + type=int, + help="Largeur maximum de l'image à vérifier", + ) + subparser.add_argument( + "-minh", + "--min-height", + dest="min_image_height", + default=400, + type=int, + help="Hauteur minimum de l'image à vérifier", + ) + subparser.add_argument( + "-maxh", + "--max-height", + dest="max_image_height", + default=800, + type=int, + help="Hauteur maximum de l'image à vérifier", + ) + subparser.add_argument( + "-r", + "--raise", + dest="raise_exceptions", + action="store_true", + default=False, + help="Lever des exceptions et donc arrêter le programme si des erreurs sont rencontrées", + ) + subparser.set_defaults(func=run) + return subparser + + +# ############################################################################ +# ########## MAIN ################ +# ################################ + + +def check_author_md(author: str, folder: Path) -> bool: + if author == "Geotribu": + return True + p = os.path.join(folder, f"{sluggy(author)}.md") + return os.path.exists(p) + + +def check_image_size( + image_url: str, minw: int, maxw: int, minh: int, maxh: int +) -> bool: + width, height = get_image_dimensions_by_url(image_url) + return minw <= width <= maxw and minh <= height <= maxh + + +def get_existing_tags() -> list[str]: + jfc = JsonFeedClient() + return jfc.tags(should_sort=True) + + +def check_existing_tags(tags: list[str]) -> tuple[bool, set[str], set[str]]: + existing_tags = get_existing_tags() + all_exists = set(tags).issubset(existing_tags) + missing = set(tags).difference(existing_tags) + present = set(tags).intersection(existing_tags) + return all_exists, missing, present + + +def check_tags_order(tags: list[str]) -> bool: + for i in range(len(tags) - 1): + if sluggy(tags[i].upper()) > sluggy(tags[i + 1].upper()): + return False + return True + + +def check_mandatory_keys( + keys: list[str], mandatory: list[str] = MANDATORY_KEYS +) -> tuple[bool, set[str]]: + missing = set() + for mk in mandatory: + if mk not in keys: + missing.add(mk) + return len(missing) == 0, missing + + +def run(args: argparse.Namespace) -> None: + """Run the sub command logic. + + Checks YAML header of a content + + Args: + args (argparse.Namespace): arguments passed to the subcommand + """ + logger.debug(f"Running {args.command} with {args}") + content_paths: list[Path] = args.content_path + + for content_path in content_paths: + logger.info(f"Checking header of {content_path}") + check_path( + input_path=content_path, + must_be_a_file=True, + must_be_a_folder=False, + must_be_readable=True, + raise_error=True, + ) + + with content_path.open(mode="r", encoding="UTF-8") as file: + content = frontmatter.load(file) + yaml_meta = content.metadata + logger.debug(f"YAML metadata loaded : {yaml_meta}") + + # check that image size is okay + if "image" in yaml_meta: + if not yaml_meta["image"]: + logger.error("Pas d'URL pour l'image") + elif not check_image_size( + yaml_meta["image"], + args.min_image_width, + args.max_image_width, + args.min_image_height, + args.max_image_height, + ): + msg = ( + f"Les dimensions de l'image ne sont pas dans l'intervalle autorisé " + f"(w:{args.min_image_width}-{args.max_image_width}," + f"h:{args.min_image_height}-{args.max_image_height})" + ) + logger.error(msg) + if args.raise_exceptions: + raise ValueError(msg) + else: + logger.info("Dimensions de l'image ok") + + # check that author md file is present + if args.authors_folder: + for author in yaml_meta["authors"]: + author_exists = check_author_md(author, args.authors_folder) + if not author_exists: + msg = f"Le fichier de l'auteur/autrice '{author}' n'a pas pu être trouvé dans le répertoire" + logger.error(msg) + if args.raise_exceptions: + raise ValueError(msg) + else: + logger.info(f"Markdown de l'auteur/autrice '{author}' ok") + + # check that tags already exist + all_exists, missing, _ = check_existing_tags(yaml_meta["tags"]) + if not all_exists: + msg = f"Les tags suivants n'existent pas dans les contenus Geotribu précédents : {','.join(missing)}" + logger.error(msg) + if args.raise_exceptions: + raise ValueError(msg) + else: + logger.info("Existence des tags ok") + + # check if tags are alphabetically sorted + if not check_tags_order(yaml_meta["tags"]): + msg = f"Les tags ne sont pas triés par ordre alphabétique : {yaml_meta['tags']}" + logger.error(msg) + if args.raise_exceptions: + raise ValueError(msg) + else: + logger.info("Ordre alphabétique des tags ok") + + # check that mandatory keys are present + all_present, missing = check_mandatory_keys( + yaml_meta.keys(), MANDATORY_KEYS + ) + if not all_present: + msg = f"Les clés suivantes ne sont pas présentes dans l'entête markdown : {','.join(missing)}" + logger.error(msg) + if args.raise_exceptions: + raise ValueError(msg) + else: + logger.info("Clés de l'entête ok") diff --git a/geotribu_cli/subcommands/__init__.py b/geotribu_cli/subcommands/__init__.py index a9908e9..c38d43d 100644 --- a/geotribu_cli/subcommands/__init__.py +++ b/geotribu_cli/subcommands/__init__.py @@ -4,6 +4,7 @@ from geotribu_cli.comments import parser_comments_broadcast # noqa: F401 from geotribu_cli.comments import parser_comments_latest # noqa: F401 from geotribu_cli.comments import parser_comments_read # noqa: F401 +from geotribu_cli.content.header_check import parser_header_check # noqa: F401 from geotribu_cli.content.new_article import parser_new_article # noqa: F401 from geotribu_cli.images.images_optimizer import parser_images_optimizer # noqa: F401 from geotribu_cli.rss.rss_reader import parser_latest_content # noqa: F401 diff --git a/geotribu_cli/utils/check_image_size.py b/geotribu_cli/utils/check_image_size.py index b879a27..594e372 100644 --- a/geotribu_cli/utils/check_image_size.py +++ b/geotribu_cli/utils/check_image_size.py @@ -16,9 +16,11 @@ from decimal import Decimal from pathlib import Path from typing import Union +from urllib import request # 3rd party import imagesize +from PIL import ImageFile # ############################################################################# # ########## Globals ############### @@ -61,6 +63,25 @@ def get_image_size(image_filepath: Path) -> tuple[int, int]: return None +def get_image_dimensions_by_url(url: str) -> tuple[int, int]: + """Get image dimensions as a tuple (width,height) of an image at an URL. Return None in case of error or no data. + + :param str url: url of the image + + :return Tuple[int, int]: dimensions tuple (width,height) + """ + with request.urlopen(url) as file: + parser = ImageFile.Parser() + while True: + data = file.read(1024) + if not data: + break + parser.feed(data) + if parser.image: + return parser.image.size + return None + + def get_svg_size(image_filepath: Path) -> tuple[int, int]: """Extract SVG width and height from a SVG file and convert them into integers. \ Relevant and working only if the file root has width and height attributes. diff --git a/tests/fixtures/content/2012-12-21_article_passe.md b/tests/fixtures/content/2012-12-21_article_passe.md new file mode 100644 index 0000000..9e1ccbc --- /dev/null +++ b/tests/fixtures/content/2012-12-21_article_passe.md @@ -0,0 +1,20 @@ +--- +title: Article supposément rédigé dans le passé +subtitle: Article supposément rédigé dans le passé pour tests +authors: + - Jane Doe +categories: + - article +comments: true +date: 2012-12-21 +description: Article supposément rédigé dans le passé +icon: octicons/server-16 +license: beerware +robots: index, follow +tags: + - Fromage + - OSM + - QGIS +--- + +# Article supposément rédigé dans le futur \ No newline at end of file diff --git a/tests/fixtures/content/2044-04-01_article_futur.md b/tests/fixtures/content/2044-04-01_article_futur.md new file mode 100644 index 0000000..ec73d39 --- /dev/null +++ b/tests/fixtures/content/2044-04-01_article_futur.md @@ -0,0 +1,19 @@ +--- +title: Article supposément rédigé dans le futur +subtitle: Article supposément rédigé dans le futur pour tests +authors: + - Jane Doe +categories: + - article +comments: true +date: 2044-04-01 +icon: octicons/server-16 +robots: index, follow +tags: + - Fromage + - IGN + - QGIS + - OSM +--- + +# Article supposément rédigé dans le futur \ No newline at end of file diff --git a/tests/fixtures/team/jane-doe.md b/tests/fixtures/team/jane-doe.md new file mode 100644 index 0000000..27510ea --- /dev/null +++ b/tests/fixtures/team/jane-doe.md @@ -0,0 +1,26 @@ +--- +title: Jane Doe +categories: + - contributeur +social: + - bluesky: + - github: + - gitlab: + - linkedin: + - mail: + - mastodon: + - instance: + - username: + - openstreetmap: + - osgeo: + - twitter: + - website: +--- + +# Jane Doe + + + +Test + + diff --git a/tests/test_utils_images_size.py b/tests/test_utils_images_size.py index 7a69c78..a953a45 100644 --- a/tests/test_utils_images_size.py +++ b/tests/test_utils_images_size.py @@ -21,7 +21,11 @@ # project from geotribu_cli.__about__ import __title_clean__, __version__ -from geotribu_cli.utils.check_image_size import check_image_dimensions, get_image_size +from geotribu_cli.utils.check_image_size import ( + check_image_dimensions, + get_image_dimensions_by_url, + get_image_size, +) # ############################################################################ # ########## Classes ############# @@ -120,6 +124,15 @@ def test_check_image_dimensions(self): ) ) + def test_image_url_dimensions(self): + for url, width, height in [ + ("https://cdn.geotribu.fr/img/coup_de_gueule.jpg", 74, 64), + ("https://cdn.geotribu.fr/img/pytroll.png", 100, 100), + ]: + w, h = get_image_dimensions_by_url(url) + self.assertEqual(w, width) + self.assertEqual(h, height) + # ############################################################################ # ####### Stand-alone run ######## diff --git a/tests/test_yaml_header_check.py b/tests/test_yaml_header_check.py new file mode 100644 index 0000000..7cb7ecb --- /dev/null +++ b/tests/test_yaml_header_check.py @@ -0,0 +1,77 @@ +import unittest +from pathlib import Path +from unittest.mock import patch + +import yaml + +from geotribu_cli.content.header_check import ( + check_author_md, + check_existing_tags, + check_mandatory_keys, + check_tags_order, +) + +TEAM_FOLDER = Path("tests/fixtures/team") + + +class TestYamlHeaderCheck(unittest.TestCase): + def setUp(self): + with open("tests/fixtures/content/2012-12-21_article_passe.md") as past_file: + past_content = past_file.read() + _, front_matter, _ = past_content.split("---", 2) + self.past_yaml_meta = yaml.safe_load(front_matter) + + with open("tests/fixtures/content/2044-04-01_article_futur.md") as future_file: + future_content = future_file.read() + _, front_matter, _ = future_content.split("---", 2) + self.future_yaml_meta = yaml.safe_load(front_matter) + + @patch("geotribu_cli.content.header_check.get_existing_tags") + def test_past_tags_existence(self, get_existing_tags_mock): + get_existing_tags_mock.return_value = ["QGIS", "OSM"] + tags_ok, missing_tags, present_tags = check_existing_tags( + self.past_yaml_meta["tags"] + ) + self.assertFalse(tags_ok) + self.assertIn("Fromage", missing_tags) + self.assertIn("QGIS", present_tags) + self.assertIn("OSM", present_tags) + + @patch("geotribu_cli.content.header_check.get_existing_tags") + def test_future_tags_existence(self, get_existing_tags_mock): + get_existing_tags_mock.return_value = ["Fromage", "IGN"] + tags_ok, missing_tags, present_tags = check_existing_tags( + self.future_yaml_meta["tags"] + ) + self.assertFalse(tags_ok) + self.assertIn("QGIS", missing_tags) + self.assertIn("OSM", missing_tags) + self.assertIn("Fromage", present_tags) + self.assertIn("IGN", present_tags) + + def test_past_tags_order(self): + self.assertTrue(check_tags_order(self.past_yaml_meta["tags"])) + + def test_future_tags_order(self): + self.assertFalse(check_tags_order(self.future_yaml_meta["tags"])) + + def test_past_mandatory_keys(self): + all_present, missing = check_mandatory_keys(self.past_yaml_meta.keys()) + self.assertTrue(all_present) + self.assertEqual(len(missing), 0) + + def test_future_mandatory_keys(self): + all_present, missing = check_mandatory_keys(self.future_yaml_meta.keys()) + self.assertFalse(all_present) + self.assertEqual(len(missing), 2) + self.assertIn("license", missing) + self.assertIn("description", missing) + + def test_author_md_ok(self): + self.assertTrue(check_author_md("Jane Doe", TEAM_FOLDER)) + self.assertTrue(check_author_md("JaNe DoE", TEAM_FOLDER)) + self.assertTrue(check_author_md("Jàne Doe", TEAM_FOLDER)) + self.assertTrue(check_author_md("Jàne Döe", TEAM_FOLDER)) + self.assertTrue(check_author_md("Jàne Döé", TEAM_FOLDER)) + self.assertTrue(check_author_md("Jàne D'öé", TEAM_FOLDER)) + self.assertFalse(check_author_md("JaneDoe", TEAM_FOLDER))