Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(excel2json): replace BaseError with UserError (DEV-2671) #523

Merged
36 changes: 19 additions & 17 deletions src/dsp_tools/utils/excel2json/lists.py
Expand Up @@ -11,7 +11,7 @@
from openpyxl.cell import Cell
from openpyxl.worksheet.worksheet import Worksheet

from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.exceptions import BaseError, UserError
from dsp_tools.utils.shared import simplify_name

list_of_lists_of_previous_cell_values: list[list[str]] = []
Expand All @@ -36,7 +36,7 @@ def expand_lists_from_excel(
If this is an empty list, an empty list will be returned.

Raises:
BaseError: if a problem occurred while trying to expand the Excel files
UserError: if a problem occurred while trying to expand the Excel files

Returns:
the same "lists" section, but without references to Excel files
Expand All @@ -63,7 +63,7 @@ def expand_lists_from_excel(
f"files therein have been temporarily expanded into the 'lists' section of your project."
)
except BaseError as err:
raise BaseError(
raise UserError(
f"\tWARNING: The list '{_list['name']}' contains a reference to the folder '{foldername}', but a "
f"problem occurred while trying to expand the Excel files therein into the 'lists' section of "
f"your project: {err.message}"
Expand Down Expand Up @@ -95,7 +95,7 @@ def _get_values_from_excel(
verbose: verbose switch

Raises:
BaseError: if one of the Excel files contains invalid data
UserError: if one of the Excel files contains invalid data

Returns:
int: Row index for the next loop (current row index minus 1)
Expand All @@ -108,7 +108,7 @@ def _get_values_from_excel(

for excelfile in excelfiles.values():
if any((not excelfile["A1"].value, excelfile["B1"].value)):
raise BaseError(
raise UserError(
f"ERROR: Inconsistency in Excel list: The first row must consist of exactly one value, in cell A1. "
f"All other cells of row 1 must be empty.\nInstead, found the following:\n"
f" - Cell A1: '{excelfile['A1'].value}'\n"
Expand All @@ -124,7 +124,7 @@ def _get_values_from_excel(
# check if all predecessors in row (values to the left) are consistent with the values in preval list
for idx, val in enumerate(preval[:-1]):
if val != str(base_file_ws.cell(column=idx + 1, row=row).value).strip():
raise BaseError(
raise UserError(
"ERROR: Inconsistency in Excel list: "
f"{val} not equal to {str(base_file_ws.cell(column=idx+1, row=row).value).strip()}"
)
Expand All @@ -144,13 +144,13 @@ def _get_values_from_excel(

# if value was last in row (no further values to the right), it's a node, continue here
else:
# check if there are duplicate nodes (i.e. identical rows), raise a BaseError if so
# check if there are duplicate nodes (i.e. identical rows), raise a UserError if so
new_check_list = preval.copy()
new_check_list.append(str(cell.value).strip())
list_of_lists_of_previous_cell_values.append(new_check_list)

if any(list_of_lists_of_previous_cell_values.count(x) > 1 for x in list_of_lists_of_previous_cell_values):
raise BaseError(
raise UserError(
f"ERROR: There is at least one duplicate node in the list. "
f"Found duplicate in column {cell.column}, row {cell.row}:\n'{str(cell.value).strip()}'"
)
Expand All @@ -169,7 +169,7 @@ def _get_values_from_excel(
for other_lang, ws_other_lang in excelfiles.items():
cell_value = ws_other_lang.cell(column=col, row=row).value
if not (isinstance(cell_value, str) and len(cell_value) > 0):
raise BaseError(
raise UserError(
"ERROR: Malformed Excel file: The Excel file with the language code "
f"'{other_lang}' should have a value in row {row}, column {col}"
)
Expand Down Expand Up @@ -208,7 +208,7 @@ def _make_json_lists_from_excel(
verbose: verbose switch

Raises:
BaseError: if one of the Excel files contains invalid data
UserError: if one of the Excel files contains invalid data

Returns:
The finished "lists" section
Expand Down Expand Up @@ -272,7 +272,8 @@ def validate_lists_section_with_schema(
lists_section: the "lists" section as Python object

Raises:
BaseError: if the validation fails
UserError: if the validation fails
BaseError: if this function is called with invalid parameters

Returns:
True if the "lists" section passed validation
Expand All @@ -290,15 +291,15 @@ def validate_lists_section_with_schema(
project = json.load(f)
lists_section = project["project"].get("lists")
if not lists_section:
raise BaseError(
raise UserError(
f"Cannot validate 'lists' section of {path_to_json_project_file}, "
"because there is no 'lists' section in this file."
)

try:
jsonschema.validate(instance={"lists": lists_section}, schema=lists_schema)
except jsonschema.ValidationError as err:
raise BaseError(
raise UserError(
f"'lists' section did not pass validation. The error message is: {err.message}\n"
f"The error occurred at {err.json_path}"
) from None
Expand All @@ -315,13 +316,13 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
excelfolder: path to the folder containing the Excel file(s)

Raises:
BaseError: if excelfolder is not a directory, or if one of the files in it has an invalid name
UserError: if excelfolder is not a directory, or if one of the files in it has an invalid name

Returns:
list of the Excel file paths to process
"""
if not os.path.isdir(excelfolder):
raise BaseError(f"ERROR: {excelfolder} is not a directory.")
raise UserError(f"ERROR: {excelfolder} is not a directory.")

excel_file_paths = [
filename
Expand All @@ -331,7 +332,7 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:

for filepath in excel_file_paths:
if not regex.search(r"^(de|en|fr|it|rm)\.xlsx$", os.path.basename(filepath)):
raise BaseError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")
raise UserError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")

return excel_file_paths

Expand All @@ -350,7 +351,8 @@ def excel2lists(
verbose: verbose switch

Raises:
BaseError if something went wrong
UserError: if something went wrong
BaseError: if something went wrong

Returns:
a tuple consisting of the "lists" section as Python list, and the success status (True if everything went well)
Expand Down
13 changes: 7 additions & 6 deletions src/dsp_tools/utils/excel2json/project.py
Expand Up @@ -3,7 +3,7 @@

import regex

from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.exceptions import UserError
from dsp_tools.utils.excel2json.lists import excel2lists
from dsp_tools.utils.excel2json.properties import excel2properties
from dsp_tools.utils.excel2json.resources import excel2resources
Expand Down Expand Up @@ -34,6 +34,7 @@ def excel2json(
path_to_output_file: path to the file where the output JSON file will be saved

Raises:
UserError: if something went wrong
BaseError: if something went wrong

Returns:
Expand All @@ -45,19 +46,19 @@ def excel2json(
# validate input
# --------------
if not os.path.isdir(data_model_files):
raise BaseError(f"ERROR: {data_model_files} is not a directory.")
raise UserError(f"ERROR: {data_model_files} is not a directory.")
folder = [x for x in os.scandir(data_model_files) if not regex.search(r"^(\.|~\$).+", x.name)]

processed_files = []
onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
if len(onto_folders) == 0:
raise BaseError(
raise UserError(
f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
)
for onto_folder in onto_folders:
contents = sorted([x.name for x in os.scandir(onto_folder) if not regex.search(r"^(\.|~\$).+", x.name)])
if contents != ["properties.xlsx", "resources.xlsx"]:
raise BaseError(
raise UserError(
f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' "
"and one file 'resources.xlsx', but nothing else."
)
Expand All @@ -67,13 +68,13 @@ def excel2json(
if listfolder:
listfolder_contents = [x for x in os.scandir(listfolder[0]) if not regex.search(r"^(\.|~\$).+", x.name)]
if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents):
raise BaseError(
raise UserError(
f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx"
)
processed_files = [f"{data_model_files}/lists/{file.name}" for file in listfolder_contents] + processed_files

if len(onto_folders) + len(listfolder) != len(folder):
raise BaseError(
raise UserError(
f"The only allowed subfolders in '{data_model_files}' are 'lists' "
"and folders that match the pattern 'onto_name (onto_label)'"
)
Expand Down
18 changes: 9 additions & 9 deletions src/dsp_tools/utils/excel2json/resources.py
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
import regex

from dsp_tools.models.exceptions import BaseError
from dsp_tools.models.exceptions import UserError
from dsp_tools.utils.shared import check_notna, prepare_dataframe

languages = ["en", "de", "fr", "it", "rm"]
Expand All @@ -27,7 +27,7 @@ def _validate_resources(
excelfile: path to the Excel file containing the resources

Raises:
BaseError: if the validation fails
UserError: if the validation fails

Returns:
True if the "resources" section passed validation
Expand Down Expand Up @@ -70,7 +70,7 @@ def _validate_resources(
)
else:
err_msg += f"The error message is: {err.message}\nThe error occurred at {err.json_path}"
raise BaseError(err_msg) from None
raise UserError(err_msg) from None

# check if resource names are unique
all_names = [r["name"] for r in resources_list]
Expand All @@ -85,7 +85,7 @@ def _validate_resources(
)
for row_no, resname in duplicates.items():
err_msg += f" - Row {row_no}: {resname}\n"
raise BaseError(err_msg)
raise UserError(err_msg)

return True

Expand All @@ -104,7 +104,7 @@ def _row2resource(
excelfile: Excel file where the data comes from

Raises:
BaseError: if the row or the details sheet contains invalid data
UserError: if the row or the details sheet contains invalid data

Returns:
dict object of the resource
Expand Down Expand Up @@ -135,7 +135,7 @@ def _row2resource(
try:
details_df = pd.read_excel(excelfile, sheet_name=name)
except ValueError as err:
raise BaseError(str(err)) from None
raise UserError(str(err)) from None
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
p.stop()
details_df = prepare_dataframe(
df=details_df,
Expand Down Expand Up @@ -163,7 +163,7 @@ def _row2resource(
else: # column gui_order present but not properly filled in (missing values)
validation_passed = False
if not validation_passed:
raise BaseError(
raise UserError(
f"Sheet '{name}' in file '{excelfile}' has invalid content in column 'gui_order': "
f"only positive integers allowed (or leave column empty altogether)"
)
Expand Down Expand Up @@ -203,7 +203,7 @@ def excel2resources(
(otherwise, it's only returned as return value)

Raises:
BaseError: if something went wrong
UserError: if something went wrong

Returns:
a tuple consisting of the "resources" section as Python list,
Expand Down Expand Up @@ -237,7 +237,7 @@ def excel2resources(
for index, row in all_classes_df.iterrows():
index = int(str(index)) # index is a label/index/hashable, but we need an int
if not check_notna(row["super"]):
raise BaseError(f"Sheet 'classes' of '{excelfile}' has a missing value in row {index + 2}, column 'super'")
raise UserError(f"Sheet 'classes' of '{excelfile}' has a missing value in row {index + 2}, column 'super'")
if any(all_classes_df.get(lang) is not None for lang in languages):
warnings.warn(
f"The file {excelfile} uses {languages} as column titles, which is deprecated. "
Expand Down