dasch-swiss · jnussbaum · Nov 16, 2023 · Nov 16, 2023 · Nov 16, 2023 · Nov 16, 2023
diff --git a/src/dsp_tools/commands/excel2json/lists.py b/src/dsp_tools/commands/excel2json/lists.py
@@ -1,8 +1,7 @@
 """This module handles all the operations which are used for the creation of JSON lists from Excel files."""
-import glob
 import importlib.resources
 import json
-import os
+from pathlib import Path
 from typing import Any, Optional, Union
 
 import jsonschema
@@ -196,7 +195,7 @@ def _get_values_from_excel(
 
 
 def _make_json_lists_from_excel(
-    excel_file_paths: list[str],
+    excel_file_paths: list[Path],
     verbose: bool = False,
 ) -> list[dict[str, Any]]:
     """
@@ -224,10 +223,7 @@ def _make_json_lists_from_excel(
     startcol = 1
 
     # make a dict with the language labels and the worksheets
-    lang_to_worksheet = {
-        os.path.basename(filepath)[:2]: load_workbook(filepath, read_only=True).worksheets[0]
-        for filepath in excel_file_paths
-    }
+    lang_to_worksheet = {x.stem: load_workbook(x, read_only=True).worksheets[0] for x in excel_file_paths}
 
     # take English as base file. If English is not available, take a random one.
     base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]
@@ -308,7 +304,7 @@ def validate_lists_section_with_schema(
     return True
 
 
-def _extract_excel_file_paths(excelfolder: str) -> list[str]:
+def _extract_excel_file_paths(excelfolder: str) -> list[Path]:
     """
     This method extracts the names of the Excel files that are in the folder, and asserts that they are named according
     to the requirements.
@@ -322,17 +318,14 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
     Returns:
         list of the Excel file paths to process
     """
-    if not os.path.isdir(excelfolder):
+    if not Path(excelfolder).is_dir():
         raise UserError(f"ERROR: {excelfolder} is not a directory.")
 
-    excel_file_paths = [
-        filename
-        for filename in glob.iglob(f"{excelfolder}/*.xlsx")
-        if not os.path.basename(filename).startswith("~$") and os.path.isfile(filename)
-    ]
+    supported_files = ["en.xlsx", "de.xlsx", "fr.xlsx", "it.xlsx", "rm.xlsx"]
+    excel_file_paths = [x for x in Path(excelfolder).glob("*.xlsx") if x.is_file() and not x.name.startswith("~$")]
 
     for filepath in excel_file_paths:
-        if not regex.search(r"^(de|en|fr|it|rm)\.xlsx$", os.path.basename(filepath)):
+        if filepath.name not in supported_files:
             raise UserError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")
 
     return excel_file_paths

diff --git a/src/dsp_tools/commands/excel2json/project.py b/src/dsp_tools/commands/excel2json/project.py
@@ -1,5 +1,5 @@
 import json
-import os
+from pathlib import Path
 
 import regex
 
@@ -45,28 +45,28 @@ def excel2json(
 
     # validate input
     # --------------
-    if not os.path.isdir(data_model_files):
+    if not Path(data_model_files).is_dir():
         raise UserError(f"ERROR: {data_model_files} is not a directory.")
-    folder = [x for x in os.scandir(data_model_files) if not regex.search(r"^(\.|~\$).+", x.name)]
+    folder = [x for x in Path(data_model_files).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]
 
     processed_files = []
-    onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
+    onto_folders = [x for x in folder if x.is_dir() and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
     if not onto_folders:
         raise UserError(
             f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
         )
     for onto_folder in onto_folders:
-        contents = sorted([x.name for x in os.scandir(onto_folder) if not regex.search(r"^(\.|~\$).+", x.name)])
+        contents = sorted([x.name for x in Path(onto_folder).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)])
         if contents != ["properties.xlsx", "resources.xlsx"]:
             raise UserError(
                 f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' "
                 "and one file 'resources.xlsx', but nothing else."
             )
         processed_files.extend([f"{data_model_files}/{onto_folder.name}/{file}" for file in contents])
 
-    listfolder = [x for x in folder if os.path.isdir(x) and x.name == "lists"]
+    listfolder = [x for x in folder if x.is_dir() and x.name == "lists"]
     if listfolder:
-        listfolder_contents = [x for x in os.scandir(listfolder[0]) if not regex.search(r"^(\.|~\$).+", x.name)]
+        listfolder_contents = [x for x in Path(listfolder[0]).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]
         if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents):
             raise UserError(
                 f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx"

diff --git a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py
@@ -423,7 +423,7 @@ def make_bitstream_prop(
     See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#bitstream
     """
 
-    if not os.path.isfile(path):
+    if not Path(path).is_file():
         warnings.warn(
             f"Failed validation in bitstream tag of resource '{calling_resource}': "
             f"The following path doesn't point to a file: {path}",

diff --git a/src/dsp_tools/commands/project/create/project_validate.py b/src/dsp_tools/commands/project/create/project_validate.py
@@ -1,6 +1,6 @@
 import importlib.resources
 import json
-import os
+from pathlib import Path
 from typing import Any, Union
 
 import jsonpath_ng
@@ -238,7 +238,7 @@ def validate_project(
         project_definition = input_file_or_json
     elif (
         isinstance(input_file_or_json, str)
-        and os.path.isfile(input_file_or_json)
+        and Path(input_file_or_json).is_file()
         and regex.search(r"\.json$", input_file_or_json)
     ):
         with open(input_file_or_json, encoding="utf-8") as f:

diff --git a/src/dsp_tools/commands/xmlupload/models/sipi.py b/src/dsp_tools/commands/xmlupload/models/sipi.py
@@ -1,5 +1,4 @@
 import json
-import os
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -45,7 +44,7 @@ def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
             API response
         """
         with open(filepath, "rb") as bitstream_file:
-            files = {"file": (os.path.basename(filepath), bitstream_file)}
+            files = {"file": (Path(filepath).name, bitstream_file)}
             url = self.sipi_server + "/upload"
             headers = {"Authorization": "Bearer " + self.token}
             timeout = 5 * 60