diff --git a/src/dsp_tools/commands/excel2json/lists.py b/src/dsp_tools/commands/excel2json/lists.py index bf502b37f..959c3dc33 100644 --- a/src/dsp_tools/commands/excel2json/lists.py +++ b/src/dsp_tools/commands/excel2json/lists.py @@ -1,8 +1,7 @@ """This module handles all the operations which are used for the creation of JSON lists from Excel files.""" -import glob import importlib.resources import json -import os +from pathlib import Path from typing import Any, Optional, Union import jsonschema @@ -196,7 +195,7 @@ def _get_values_from_excel( def _make_json_lists_from_excel( - excel_file_paths: list[str], + excel_file_paths: list[Path], verbose: bool = False, ) -> list[dict[str, Any]]: """ @@ -224,10 +223,7 @@ def _make_json_lists_from_excel( startcol = 1 # make a dict with the language labels and the worksheets - lang_to_worksheet = { - os.path.basename(filepath)[:2]: load_workbook(filepath, read_only=True).worksheets[0] - for filepath in excel_file_paths - } + lang_to_worksheet = {x.stem: load_workbook(x, read_only=True).worksheets[0] for x in excel_file_paths} # take English as base file. If English is not available, take a random one. base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0] @@ -308,7 +304,7 @@ def validate_lists_section_with_schema( return True -def _extract_excel_file_paths(excelfolder: str) -> list[str]: +def _extract_excel_file_paths(excelfolder: str) -> list[Path]: """ This method extracts the names of the Excel files that are in the folder, and asserts that they are named according to the requirements. @@ -322,17 +318,14 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]: Returns: list of the Excel file paths to process """ - if not os.path.isdir(excelfolder): + if not Path(excelfolder).is_dir(): raise UserError(f"ERROR: {excelfolder} is not a directory.") - excel_file_paths = [ - filename - for filename in glob.iglob(f"{excelfolder}/*.xlsx") - if not os.path.basename(filename).startswith("~$") and os.path.isfile(filename) - ] + supported_files = ["en.xlsx", "de.xlsx", "fr.xlsx", "it.xlsx", "rm.xlsx"] + excel_file_paths = [x for x in Path(excelfolder).glob("*.xlsx") if x.is_file() and not x.name.startswith("~$")] for filepath in excel_file_paths: - if not regex.search(r"^(de|en|fr|it|rm)\.xlsx$", os.path.basename(filepath)): + if filepath.name not in supported_files: raise UserError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'") return excel_file_paths diff --git a/src/dsp_tools/commands/excel2json/project.py b/src/dsp_tools/commands/excel2json/project.py index 7b649d797..7bf017ae5 100644 --- a/src/dsp_tools/commands/excel2json/project.py +++ b/src/dsp_tools/commands/excel2json/project.py @@ -1,5 +1,5 @@ import json -import os +from pathlib import Path import regex @@ -45,18 +45,18 @@ def excel2json( # validate input # -------------- - if not os.path.isdir(data_model_files): + if not Path(data_model_files).is_dir(): raise UserError(f"ERROR: {data_model_files} is not a directory.") - folder = [x for x in os.scandir(data_model_files) if not regex.search(r"^(\.|~\$).+", x.name)] + folder = [x for x in Path(data_model_files).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)] processed_files = [] - onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)] + onto_folders = [x for x in folder if x.is_dir() and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)] if not onto_folders: raise UserError( f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'" ) for onto_folder in onto_folders: - contents = sorted([x.name for x in os.scandir(onto_folder) if not regex.search(r"^(\.|~\$).+", x.name)]) + contents = sorted([x.name for x in Path(onto_folder).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]) if contents != ["properties.xlsx", "resources.xlsx"]: raise UserError( f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' " @@ -64,9 +64,9 @@ def excel2json( ) processed_files.extend([f"{data_model_files}/{onto_folder.name}/{file}" for file in contents]) - listfolder = [x for x in folder if os.path.isdir(x) and x.name == "lists"] + listfolder = [x for x in folder if x.is_dir() and x.name == "lists"] if listfolder: - listfolder_contents = [x for x in os.scandir(listfolder[0]) if not regex.search(r"^(\.|~\$).+", x.name)] + listfolder_contents = [x for x in Path(listfolder[0]).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)] if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents): raise UserError( f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx" diff --git a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py index d85276fd1..2131a8b3d 100644 --- a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py +++ b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py @@ -423,7 +423,7 @@ def make_bitstream_prop( See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#bitstream """ - if not os.path.isfile(path): + if not Path(path).is_file(): warnings.warn( f"Failed validation in bitstream tag of resource '{calling_resource}': " f"The following path doesn't point to a file: {path}", diff --git a/src/dsp_tools/commands/project/create/project_validate.py b/src/dsp_tools/commands/project/create/project_validate.py index 99eabcd53..6001ab7cd 100644 --- a/src/dsp_tools/commands/project/create/project_validate.py +++ b/src/dsp_tools/commands/project/create/project_validate.py @@ -1,6 +1,6 @@ import importlib.resources import json -import os +from pathlib import Path from typing import Any, Union import jsonpath_ng @@ -238,7 +238,7 @@ def validate_project( project_definition = input_file_or_json elif ( isinstance(input_file_or_json, str) - and os.path.isfile(input_file_or_json) + and Path(input_file_or_json).is_file() and regex.search(r"\.json$", input_file_or_json) ): with open(input_file_or_json, encoding="utf-8") as f: diff --git a/src/dsp_tools/commands/xmlupload/models/sipi.py b/src/dsp_tools/commands/xmlupload/models/sipi.py index f4487a234..40faca600 100644 --- a/src/dsp_tools/commands/xmlupload/models/sipi.py +++ b/src/dsp_tools/commands/xmlupload/models/sipi.py @@ -1,5 +1,4 @@ import json -import os from dataclasses import dataclass from datetime import datetime from pathlib import Path @@ -45,7 +44,7 @@ def upload_bitstream(self, filepath: str) -> dict[Any, Any]: API response """ with open(filepath, "rb") as bitstream_file: - files = {"file": (os.path.basename(filepath), bitstream_file)} + files = {"file": (Path(filepath).name, bitstream_file)} url = self.sipi_server + "/upload" headers = {"Authorization": "Bearer " + self.token} timeout = 5 * 60