Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: replace os.path and os.scandir with pathlib.Path #648

Merged
merged 12 commits into from
Nov 16, 2023
23 changes: 8 additions & 15 deletions src/dsp_tools/commands/excel2json/lists.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
"""This module handles all the operations which are used for the creation of JSON lists from Excel files."""
import glob
import importlib.resources
import json
import os
from pathlib import Path
from typing import Any, Optional, Union

import jsonschema
Expand Down Expand Up @@ -196,7 +195,7 @@ def _get_values_from_excel(


def _make_json_lists_from_excel(
excel_file_paths: list[str],
excel_file_paths: list[Path],
verbose: bool = False,
) -> list[dict[str, Any]]:
"""
Expand Down Expand Up @@ -224,10 +223,7 @@ def _make_json_lists_from_excel(
startcol = 1

# make a dict with the language labels and the worksheets
lang_to_worksheet = {
os.path.basename(filepath)[:2]: load_workbook(filepath, read_only=True).worksheets[0]
for filepath in excel_file_paths
}
lang_to_worksheet = {x.stem: load_workbook(x, read_only=True).worksheets[0] for x in excel_file_paths}

# take English as base file. If English is not available, take a random one.
base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]
Expand Down Expand Up @@ -308,7 +304,7 @@ def validate_lists_section_with_schema(
return True


def _extract_excel_file_paths(excelfolder: str) -> list[str]:
def _extract_excel_file_paths(excelfolder: str) -> list[Path]:
"""
This method extracts the names of the Excel files that are in the folder, and asserts that they are named according
to the requirements.
Expand All @@ -322,17 +318,14 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
Returns:
list of the Excel file paths to process
"""
if not os.path.isdir(excelfolder):
if not Path(excelfolder).is_dir():
raise UserError(f"ERROR: {excelfolder} is not a directory.")

excel_file_paths = [
filename
for filename in glob.iglob(f"{excelfolder}/*.xlsx")
if not os.path.basename(filename).startswith("~$") and os.path.isfile(filename)
]
supported_files = ["en.xlsx", "de.xlsx", "fr.xlsx", "it.xlsx", "rm.xlsx"]
excel_file_paths = [x for x in Path(excelfolder).glob("*.xlsx") if x.is_file() and not x.name.startswith("~$")]

for filepath in excel_file_paths:
if not regex.search(r"^(de|en|fr|it|rm)\.xlsx$", os.path.basename(filepath)):
if filepath.name not in supported_files:
raise UserError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")

return excel_file_paths
Expand Down
14 changes: 7 additions & 7 deletions src/dsp_tools/commands/excel2json/project.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
import os
from pathlib import Path

import regex

Expand Down Expand Up @@ -45,28 +45,28 @@ def excel2json(

# validate input
# --------------
if not os.path.isdir(data_model_files):
if not Path(data_model_files).is_dir():
raise UserError(f"ERROR: {data_model_files} is not a directory.")
folder = [x for x in os.scandir(data_model_files) if not regex.search(r"^(\.|~\$).+", x.name)]
folder = [x for x in Path(data_model_files).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]

processed_files = []
onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
onto_folders = [x for x in folder if x.is_dir() and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
if not onto_folders:
raise UserError(
f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
)
for onto_folder in onto_folders:
contents = sorted([x.name for x in os.scandir(onto_folder) if not regex.search(r"^(\.|~\$).+", x.name)])
contents = sorted([x.name for x in Path(onto_folder).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)])
if contents != ["properties.xlsx", "resources.xlsx"]:
raise UserError(
f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' "
"and one file 'resources.xlsx', but nothing else."
)
processed_files.extend([f"{data_model_files}/{onto_folder.name}/{file}" for file in contents])

listfolder = [x for x in folder if os.path.isdir(x) and x.name == "lists"]
listfolder = [x for x in folder if x.is_dir() and x.name == "lists"]
if listfolder:
listfolder_contents = [x for x in os.scandir(listfolder[0]) if not regex.search(r"^(\.|~\$).+", x.name)]
listfolder_contents = [x for x in Path(listfolder[0]).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]
if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents):
raise UserError(
f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx"
Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ def make_bitstream_prop(
See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#bitstream
"""

if not os.path.isfile(path):
if not Path(path).is_file():
warnings.warn(
f"Failed validation in bitstream tag of resource '{calling_resource}': "
f"The following path doesn't point to a file: {path}",
Expand Down
4 changes: 2 additions & 2 deletions src/dsp_tools/commands/project/create/project_validate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import importlib.resources
import json
import os
from pathlib import Path
from typing import Any, Union

import jsonpath_ng
Expand Down Expand Up @@ -238,7 +238,7 @@ def validate_project(
project_definition = input_file_or_json
elif (
isinstance(input_file_or_json, str)
and os.path.isfile(input_file_or_json)
and Path(input_file_or_json).is_file()
and regex.search(r"\.json$", input_file_or_json)
):
with open(input_file_or_json, encoding="utf-8") as f:
Expand Down
3 changes: 1 addition & 2 deletions src/dsp_tools/commands/xmlupload/models/sipi.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import os
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -45,7 +44,7 @@ def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
API response
"""
with open(filepath, "rb") as bitstream_file:
files = {"file": (os.path.basename(filepath), bitstream_file)}
files = {"file": (Path(filepath).name, bitstream_file)}
url = self.sipi_server + "/upload"
headers = {"Authorization": "Bearer " + self.token}
timeout = 5 * 60
Expand Down