dasch-swiss · Nora-Olivia-Ammann · Sep 20, 2023 · Sep 15, 2023 · Sep 15, 2023 · Sep 15, 2023
diff --git a/src/dsp_tools/utils/excel2json/lists.py b/src/dsp_tools/utils/excel2json/lists.py
@@ -11,7 +11,7 @@
 from openpyxl.cell import Cell
 from openpyxl.worksheet.worksheet import Worksheet
 
-from dsp_tools.models.exceptions import BaseError
+from dsp_tools.models.exceptions import BaseError, UserError
 from dsp_tools.utils.shared import simplify_name
 
 list_of_lists_of_previous_cell_values: list[list[str]] = []
@@ -36,7 +36,7 @@ def expand_lists_from_excel(
             If this is an empty list, an empty list will be returned.
 
     Raises:
-        BaseError: if a problem occurred while trying to expand the Excel files
+        UserError: if a problem occurred while trying to expand the Excel files
 
     Returns:
         the same "lists" section, but without references to Excel files
@@ -63,7 +63,7 @@ def expand_lists_from_excel(
                 f"files therein have been temporarily expanded into the 'lists' section of your project."
             )
         except BaseError as err:
-            raise BaseError(
+            raise UserError(
                 f"\tWARNING: The list '{_list['name']}' contains a reference to the folder '{foldername}', but a "
                 f"problem occurred while trying to expand the Excel files therein into the 'lists' section of "
                 f"your project: {err.message}"
@@ -95,7 +95,7 @@ def _get_values_from_excel(
         verbose: verbose switch
 
     Raises:
-        BaseError: if one of the Excel files contains invalid data
+        UserError: if one of the Excel files contains invalid data
 
     Returns:
         int: Row index for the next loop (current row index minus 1)
@@ -108,7 +108,7 @@ def _get_values_from_excel(
 
     for excelfile in excelfiles.values():
         if any((not excelfile["A1"].value, excelfile["B1"].value)):
-            raise BaseError(
+            raise UserError(
                 f"ERROR: Inconsistency in Excel list: The first row must consist of exactly one value, in cell A1. "
                 f"All other cells of row 1 must be empty.\nInstead, found the following:\n"
                 f" - Cell A1: '{excelfile['A1'].value}'\n"
@@ -124,7 +124,7 @@ def _get_values_from_excel(
         # check if all predecessors in row (values to the left) are consistent with the values in preval list
         for idx, val in enumerate(preval[:-1]):
             if val != str(base_file_ws.cell(column=idx + 1, row=row).value).strip():
-                raise BaseError(
+                raise UserError(
                     "ERROR: Inconsistency in Excel list: "
                     f"{val} not equal to {str(base_file_ws.cell(column=idx+1, row=row).value).strip()}"
                 )
@@ -144,13 +144,13 @@ def _get_values_from_excel(
 
         # if value was last in row (no further values to the right), it's a node, continue here
         else:
-            # check if there are duplicate nodes (i.e. identical rows), raise a BaseError if so
+            # check if there are duplicate nodes (i.e. identical rows), raise a UserError if so
             new_check_list = preval.copy()
             new_check_list.append(str(cell.value).strip())
             list_of_lists_of_previous_cell_values.append(new_check_list)
 
             if any(list_of_lists_of_previous_cell_values.count(x) > 1 for x in list_of_lists_of_previous_cell_values):
-                raise BaseError(
+                raise UserError(
                     f"ERROR: There is at least one duplicate node in the list. "
                     f"Found duplicate in column {cell.column}, row {cell.row}:\n'{str(cell.value).strip()}'"
                 )
@@ -169,7 +169,7 @@ def _get_values_from_excel(
             for other_lang, ws_other_lang in excelfiles.items():
                 cell_value = ws_other_lang.cell(column=col, row=row).value
                 if not (isinstance(cell_value, str) and len(cell_value) > 0):
-                    raise BaseError(
+                    raise UserError(
                         "ERROR: Malformed Excel file: The Excel file with the language code "
                         f"'{other_lang}' should have a value in row {row}, column {col}"
                     )
@@ -208,7 +208,7 @@ def _make_json_lists_from_excel(
         verbose: verbose switch
 
     Raises:
-        BaseError: if one of the Excel files contains invalid data
+        UserError: if one of the Excel files contains invalid data
 
     Returns:
         The finished "lists" section
@@ -272,7 +272,8 @@ def validate_lists_section_with_schema(
         lists_section: the "lists" section as Python object
 
     Raises:
-        BaseError: if the validation fails
+        UserError: if the validation fails
+        BaseError: if this function is called with invalid parameters
 
     Returns:
         True if the "lists" section passed validation
@@ -290,15 +291,15 @@ def validate_lists_section_with_schema(
             project = json.load(f)
             lists_section = project["project"].get("lists")
             if not lists_section:
-                raise BaseError(
+                raise UserError(
                     f"Cannot validate 'lists' section of {path_to_json_project_file}, "
                     "because there is no 'lists' section in this file."
                 )
 
     try:
         jsonschema.validate(instance={"lists": lists_section}, schema=lists_schema)
     except jsonschema.ValidationError as err:
-        raise BaseError(
+        raise UserError(
             f"'lists' section did not pass validation. The error message is: {err.message}\n"
             f"The error occurred at {err.json_path}"
         ) from None
@@ -315,13 +316,13 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
         excelfolder: path to the folder containing the Excel file(s)
 
     Raises:
-        BaseError: if excelfolder is not a directory, or if one of the files in it has an invalid name
+        UserError: if excelfolder is not a directory, or if one of the files in it has an invalid name
 
     Returns:
         list of the Excel file paths to process
     """
     if not os.path.isdir(excelfolder):
-        raise BaseError(f"ERROR: {excelfolder} is not a directory.")
+        raise UserError(f"ERROR: {excelfolder} is not a directory.")
 
     excel_file_paths = [
         filename
@@ -331,7 +332,7 @@ def _extract_excel_file_paths(excelfolder: str) -> list[str]:
 
     for filepath in excel_file_paths:
         if not regex.search(r"^(de|en|fr|it|rm)\.xlsx$", os.path.basename(filepath)):
-            raise BaseError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")
+            raise UserError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")
 
     return excel_file_paths
 
@@ -350,7 +351,8 @@ def excel2lists(
         verbose: verbose switch
 
     Raises:
-        BaseError if something went wrong
+        UserError: if something went wrong
+        BaseError: if something went wrong
 
     Returns:
         a tuple consisting of the "lists" section as Python list, and the success status (True if everything went well)

diff --git a/src/dsp_tools/utils/excel2json/project.py b/src/dsp_tools/utils/excel2json/project.py
@@ -3,7 +3,7 @@
 
 import regex
 
-from dsp_tools.models.exceptions import BaseError
+from dsp_tools.models.exceptions import UserError
 from dsp_tools.utils.excel2json.lists import excel2lists
 from dsp_tools.utils.excel2json.properties import excel2properties
 from dsp_tools.utils.excel2json.resources import excel2resources
@@ -34,6 +34,7 @@ def excel2json(
         path_to_output_file: path to the file where the output JSON file will be saved
 
     Raises:
+        UserError: if something went wrong
         BaseError: if something went wrong
 
     Returns:
@@ -45,19 +46,19 @@ def excel2json(
     # validate input
     # --------------
     if not os.path.isdir(data_model_files):
-        raise BaseError(f"ERROR: {data_model_files} is not a directory.")
+        raise UserError(f"ERROR: {data_model_files} is not a directory.")
     folder = [x for x in os.scandir(data_model_files) if not regex.search(r"^(\.|~\$).+", x.name)]
 
     processed_files = []
     onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
     if len(onto_folders) == 0:
-        raise BaseError(
+        raise UserError(
             f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
         )
     for onto_folder in onto_folders:
         contents = sorted([x.name for x in os.scandir(onto_folder) if not regex.search(r"^(\.|~\$).+", x.name)])
         if contents != ["properties.xlsx", "resources.xlsx"]:
-            raise BaseError(
+            raise UserError(
                 f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' "
                 "and one file 'resources.xlsx', but nothing else."
             )
@@ -67,13 +68,13 @@ def excel2json(
     if listfolder:
         listfolder_contents = [x for x in os.scandir(listfolder[0]) if not regex.search(r"^(\.|~\$).+", x.name)]
         if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents):
-            raise BaseError(
+            raise UserError(
                 f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx"
             )
         processed_files = [f"{data_model_files}/lists/{file.name}" for file in listfolder_contents] + processed_files
 
     if len(onto_folders) + len(listfolder) != len(folder):
-        raise BaseError(
+        raise UserError(
             f"The only allowed subfolders in '{data_model_files}' are 'lists' "
             "and folders that match the pattern 'onto_name (onto_label)'"
         )

diff --git a/src/dsp_tools/utils/excel2json/resources.py b/src/dsp_tools/utils/excel2json/resources.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import regex
 
-from dsp_tools.models.exceptions import BaseError
+from dsp_tools.models.exceptions import UserError
 from dsp_tools.utils.shared import check_notna, prepare_dataframe
 
 languages = ["en", "de", "fr", "it", "rm"]
@@ -27,7 +27,7 @@ def _validate_resources(
         excelfile: path to the Excel file containing the resources
 
     Raises:
-        BaseError: if the validation fails
+        UserError: if the validation fails
 
     Returns:
         True if the "resources" section passed validation
@@ -70,7 +70,7 @@ def _validate_resources(
                     )
         else:
             err_msg += f"The error message is: {err.message}\nThe error occurred at {err.json_path}"
-        raise BaseError(err_msg) from None
+        raise UserError(err_msg) from None
 
     # check if resource names are unique
     all_names = [r["name"] for r in resources_list]
@@ -85,7 +85,7 @@ def _validate_resources(
         )
         for row_no, resname in duplicates.items():
             err_msg += f" - Row {row_no}: {resname}\n"
-        raise BaseError(err_msg)
+        raise UserError(err_msg)
 
     return True
 
@@ -104,7 +104,7 @@ def _row2resource(
         excelfile: Excel file where the data comes from
 
     Raises:
-        BaseError: if the row or the details sheet contains invalid data
+        UserError: if the row or the details sheet contains invalid data
 
     Returns:
         dict object of the resource
@@ -135,7 +135,7 @@ def _row2resource(
         try:
             details_df = pd.read_excel(excelfile, sheet_name=name)
         except ValueError as err:
-            raise BaseError(str(err)) from None
+            raise UserError(str(err)) from None
         p.stop()
     details_df = prepare_dataframe(
         df=details_df,
@@ -163,7 +163,7 @@ def _row2resource(
     else:  # column gui_order present but not properly filled in (missing values)
         validation_passed = False
     if not validation_passed:
-        raise BaseError(
+        raise UserError(
             f"Sheet '{name}' in file '{excelfile}' has invalid content in column 'gui_order': "
             f"only positive integers allowed (or leave column empty altogether)"
         )
@@ -203,7 +203,7 @@ def excel2resources(
             (otherwise, it's only returned as return value)
 
     Raises:
-        BaseError: if something went wrong
+        UserError: if something went wrong
 
     Returns:
         a tuple consisting of the "resources" section as Python list,
@@ -237,7 +237,7 @@ def excel2resources(
     for index, row in all_classes_df.iterrows():
         index = int(str(index))  # index is a label/index/hashable, but we need an int
         if not check_notna(row["super"]):
-            raise BaseError(f"Sheet 'classes' of '{excelfile}' has a missing value in row {index + 2}, column 'super'")
+            raise UserError(f"Sheet 'classes' of '{excelfile}' has a missing value in row {index + 2}, column 'super'")
     if any(all_classes_df.get(lang) is not None for lang in languages):
         warnings.warn(
             f"The file {excelfile} uses {languages} as column titles, which is deprecated. "