diff --git a/src/dsp_tools/commands/excel2json/input_error.py b/src/dsp_tools/commands/excel2json/input_error.py index 432aed53a..f29a5258f 100644 --- a/src/dsp_tools/commands/excel2json/input_error.py +++ b/src/dsp_tools/commands/excel2json/input_error.py @@ -172,7 +172,7 @@ class MoreThanOneSheetProblem: excelname: str sheet_names: list[str] - def __str__(self) -> str: + def execute_error_protocol(self) -> str: msg = [ f"\nIn the '{self.excelname}' file only one sheet is allowed.", f"The excel used contains the following sheets:{list_separator}{list_separator.join(self.sheet_names)}", diff --git a/src/dsp_tools/commands/excel2json/lists.py b/src/dsp_tools/commands/excel2json/lists.py index 4f9a45f09..5b523d4be 100644 --- a/src/dsp_tools/commands/excel2json/lists.py +++ b/src/dsp_tools/commands/excel2json/lists.py @@ -308,7 +308,7 @@ def _make_json_lists_from_excel( def _read_and_check_workbook(excelpath: Path) -> Worksheet: all_worksheets = load_workbook(excelpath, read_only=True).worksheets if len(all_worksheets) != 1: - msg = str(MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets])) + msg = MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets]).execute_error_protocol() raise InputError(msg) return all_worksheets[0] diff --git a/src/dsp_tools/commands/excel2json/properties.py b/src/dsp_tools/commands/excel2json/properties.py index 80877cd34..5e2b25f8c 100644 --- a/src/dsp_tools/commands/excel2json/properties.py +++ b/src/dsp_tools/commands/excel2json/properties.py @@ -497,6 +497,6 @@ def excel2properties( def _read_check_property_df(excelfile: str) -> pd.DataFrame | None: sheets_df_dict = read_and_clean_all_sheets(excelfile=excelfile) if len(sheets_df_dict) != 1: - msg = str(MoreThanOneSheetProblem("properties.xlsx", list(sheets_df_dict.keys()))) + msg = MoreThanOneSheetProblem("properties.xlsx", list(sheets_df_dict.keys())).execute_error_protocol() raise InputError(msg) return next(iter(sheets_df_dict.values())) diff --git a/src/dsp_tools/commands/excel2json/resources.py b/src/dsp_tools/commands/excel2json/resources.py index 5f29f60c3..1f58efb3a 100644 --- a/src/dsp_tools/commands/excel2json/resources.py +++ b/src/dsp_tools/commands/excel2json/resources.py @@ -10,7 +10,9 @@ from dsp_tools.commands.excel2json.input_error import ( JsonValidationResourceProblem, + MissingValuesInRowProblem, PositionInExcel, + Problem, ResourcesSheetsNotAsExpected, ) from dsp_tools.commands.excel2json.utils import check_column_for_duplicate, read_and_clean_all_sheets @@ -191,17 +193,14 @@ def excel2resources( and the success status (True if everything went well) """ - resource_dfs = read_and_clean_all_sheets(excelfile) - classes_df = resource_dfs.pop("classes") - classes_df = prepare_dataframe( - df=classes_df, - required_columns=["name"], - location_of_sheet=f"Sheet 'classes' in file '{excelfile}'", - ) + all_dfs = read_and_clean_all_sheets(excelfile) + classes_df, resource_dfs = _prepare_classes_df(all_dfs) - if validation_problem := _validate_excel_file(classes_df, resource_dfs): - err_msg = validation_problem.execute_error_protocol() - raise InputError(err_msg) + if validation_problems := _validate_excel_file(classes_df, resource_dfs): + msg = "The excel file 'resources.xlsx', sheet 'classes' has a problem.\n" + "\n\n".join( + (x.execute_error_protocol() for x in validation_problems) + ) + raise InputError(msg) # transform every row into a resource resources = [_row2resource(row, resource_dfs[row["name"]]) for i, row in classes_df.iterrows()] @@ -217,26 +216,44 @@ def excel2resources( return resources, True -def _validate_excel_file( - classes_df: pd.DataFrame, df_dict: dict[str, pd.DataFrame] -) -> ResourcesSheetsNotAsExpected | None: - for i, row in classes_df.iterrows(): - index = int(str(i)) # index is a label/index/hashable, but we need an int - if not check_notna(row["super"]): - raise UserError( - f"Sheet 'classes' of 'resources.xlsx' has a missing value in row {index + 2}, column 'super'" - ) +def _prepare_classes_df(resource_dfs: dict[str, pd.DataFrame]) -> tuple[pd.DataFrame, dict[str, pd.DataFrame]]: + resource_dfs = {k.strip(): v for k, v in resource_dfs.items()} + sheet_name_list = list(resource_dfs) + cls_sheet_name = [ + ok.group(0) for x in sheet_name_list if (ok := regex.search(r"classes", flags=regex.IGNORECASE, string=x)) + ] + if not cls_sheet_name: + msg = ResourcesSheetsNotAsExpected(set(), names_sheets={"classes"}).execute_error_protocol() + raise InputError(msg) + elif len(cls_sheet_name) == 1: + classes_df = resource_dfs.pop(cls_sheet_name[0]) + else: + msg = ( + "The excel file 'resources.xlsx' has some problems.\n" + "There is more than one excel sheet called 'classes'.\n" + "This is a protected name and cannot be used for other sheets." + ) + raise InputError(msg) + classes_df = prepare_dataframe( + df=classes_df, + required_columns=["name"], + location_of_sheet="Sheet 'classes' in file 'resources.xlsx'", + ) + return classes_df, resource_dfs + + +def _validate_excel_file(classes_df: pd.DataFrame, df_dict: dict[str, pd.DataFrame]) -> list[Problem]: if any(classes_df.get(lang) is not None for lang in languages): warnings.warn( f"The file 'resources.xlsx' uses {languages} as column titles, which is deprecated. " f"Please use {[f'label_{lang}' for lang in languages]}" ) - duplicate_check = check_column_for_duplicate(classes_df, "name") - if duplicate_check: - msg = "The excel file 'resources.xlsx', sheet 'classes' has a problem.\n" - msg += duplicate_check.execute_error_protocol() - raise InputError(msg) + problems: list[Problem] = [] + if missing_super_rows := [int(index) + 2 for index, row in classes_df.iterrows() if not check_notna(row["super"])]: + problems.append(MissingValuesInRowProblem(column="super", row_numbers=missing_super_rows)) + if duplicate_check := check_column_for_duplicate(classes_df, "name"): + problems.append(duplicate_check) # check that all the sheets have an entry in the names column and vice versa - if (all_names := set(classes_df["name"].tolist())) != (all_sheets := set(df_dict.keys())): - return ResourcesSheetsNotAsExpected(all_names, all_sheets) - return None + if (all_names := set(classes_df["name"].tolist())) != (all_sheets := set(df_dict)): + problems.append(ResourcesSheetsNotAsExpected(all_names, all_sheets)) + return problems diff --git a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py index 4b4246901..67084bc89 100644 --- a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py +++ b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py @@ -270,7 +270,7 @@ def find_date_in_string(string: str) -> Optional[str]: def prepare_value( - value: Union[PropertyElement, str, int, float, bool, Iterable[Union[PropertyElement, str, int, float, bool]]] + value: Union[PropertyElement, str, int, float, bool, Iterable[Union[PropertyElement, str, int, float, bool]]], ) -> list[PropertyElement]: """ This method transforms the parameter "value" from a make_*_prop() method into a list of PropertyElements. "value" is