Skip to content

Commit

Permalink
chore: replace os.path and os.scandir with pathlib.Path (#648)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum committed Nov 16, 2023
1 parent 9272898 commit e325237
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 27 deletions.
23 changes: 8 additions & 15 deletions src/dsp_tools/commands/excel2json/lists.py
@@ -1,8 +1,7 @@
"""This module handles all the operations which are used for the creation of JSON lists from Excel files."""
import glob
import importlib.resources
import json
import os
from pathlib import Path
from typing import Any, Optional, Union

import jsonschema
Expand Down Expand Up @@ -196,7 +195,7 @@ def _get_values_from_excel(


def _make_json_lists_from_excel(
excel_file_paths: list[str],
excel_file_paths: list[Path],
verbose: bool = False,
) -> list[dict[str, Any]]:
"""
Expand Down Expand Up @@ -224,10 +223,7 @@ def _make_json_lists_from_excel(
startcol = 1

# make a dict with the language labels and the worksheets
lang_to_worksheet = {
os.path.basename(filepath)[:2]: load_workbook(filepath, read_only=True).worksheets[0]
for filepath in excel_file_paths
}
lang_to_worksheet = {x.stem: load_workbook(x, read_only=True).worksheets[0] for x in excel_file_paths}

# take English as base file. If English is not available, take a random one.
base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]
Expand Down Expand Up @@ -308,7 +304,7 @@ def validate_lists_section_with_schema(
return True


def _extract_excel_file_paths(excelfolder: str) -> list[str]:
def _extract_excel_file_paths(excelfolder: str) -> list[Path]:
"""
This method extracts the names of the Excel files that are in the folder, and asserts that they are named according
to the requirements.
Expand All @@ -322,17 +318,14 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
Returns:
list of the Excel file paths to process
"""
if not os.path.isdir(excelfolder):
if not Path(excelfolder).is_dir():
raise UserError(f"ERROR: {excelfolder} is not a directory.")

excel_file_paths = [
filename
for filename in glob.iglob(f"{excelfolder}/*.xlsx")
if not os.path.basename(filename).startswith("~$") and os.path.isfile(filename)
]
supported_files = ["en.xlsx", "de.xlsx", "fr.xlsx", "it.xlsx", "rm.xlsx"]
excel_file_paths = [x for x in Path(excelfolder).glob("*.xlsx") if x.is_file() and not x.name.startswith("~$")]

for filepath in excel_file_paths:
if not regex.search(r"^(de|en|fr|it|rm)\.xlsx$", os.path.basename(filepath)):
if filepath.name not in supported_files:
raise UserError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")

return excel_file_paths
Expand Down
14 changes: 7 additions & 7 deletions src/dsp_tools/commands/excel2json/project.py
@@ -1,5 +1,5 @@
import json
import os
from pathlib import Path

import regex

Expand Down Expand Up @@ -45,28 +45,28 @@ def excel2json(

# validate input
# --------------
if not os.path.isdir(data_model_files):
if not Path(data_model_files).is_dir():
raise UserError(f"ERROR: {data_model_files} is not a directory.")
folder = [x for x in os.scandir(data_model_files) if not regex.search(r"^(\.|~\$).+", x.name)]
folder = [x for x in Path(data_model_files).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]

processed_files = []
onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
onto_folders = [x for x in folder if x.is_dir() and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
if not onto_folders:
raise UserError(
f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
)
for onto_folder in onto_folders:
contents = sorted([x.name for x in os.scandir(onto_folder) if not regex.search(r"^(\.|~\$).+", x.name)])
contents = sorted([x.name for x in Path(onto_folder).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)])
if contents != ["properties.xlsx", "resources.xlsx"]:
raise UserError(
f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' "
"and one file 'resources.xlsx', but nothing else."
)
processed_files.extend([f"{data_model_files}/{onto_folder.name}/{file}" for file in contents])

listfolder = [x for x in folder if os.path.isdir(x) and x.name == "lists"]
listfolder = [x for x in folder if x.is_dir() and x.name == "lists"]
if listfolder:
listfolder_contents = [x for x in os.scandir(listfolder[0]) if not regex.search(r"^(\.|~\$).+", x.name)]
listfolder_contents = [x for x in Path(listfolder[0]).glob("*") if not regex.search(r"^(\.|~\$).+", x.name)]
if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents):
raise UserError(
f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx"
Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Expand Up @@ -423,7 +423,7 @@ def make_bitstream_prop(
See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#bitstream
"""

if not os.path.isfile(path):
if not Path(path).is_file():
warnings.warn(
f"Failed validation in bitstream tag of resource '{calling_resource}': "
f"The following path doesn't point to a file: {path}",
Expand Down
4 changes: 2 additions & 2 deletions src/dsp_tools/commands/project/create/project_validate.py
@@ -1,6 +1,6 @@
import importlib.resources
import json
import os
from pathlib import Path
from typing import Any, Union

import jsonpath_ng
Expand Down Expand Up @@ -238,7 +238,7 @@ def validate_project(
project_definition = input_file_or_json
elif (
isinstance(input_file_or_json, str)
and os.path.isfile(input_file_or_json)
and Path(input_file_or_json).is_file()
and regex.search(r"\.json$", input_file_or_json)
):
with open(input_file_or_json, encoding="utf-8") as f:
Expand Down
3 changes: 1 addition & 2 deletions src/dsp_tools/commands/xmlupload/models/sipi.py
@@ -1,5 +1,4 @@
import json
import os
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -45,7 +44,7 @@ def upload_bitstream(self, filepath: str) -> dict[Any, Any]:
API response
"""
with open(filepath, "rb") as bitstream_file:
files = {"file": (os.path.basename(filepath), bitstream_file)}
files = {"file": (Path(filepath).name, bitstream_file)}
url = self.sipi_server + "/upload"
headers = {"Authorization": "Bearer " + self.token}
timeout = 5 * 60
Expand Down

0 comments on commit e325237

Please sign in to comment.