Skip to content

Commit

Permalink
fix(excel2json): support uppercase classes sheet in resources.xlsx (D…
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Dec 14, 2023
1 parent 2594d36 commit 71205f8
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 31 deletions.
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2json/input_error.py
Expand Up @@ -172,7 +172,7 @@ class MoreThanOneSheetProblem:
excelname: str
sheet_names: list[str]

def __str__(self) -> str:
def execute_error_protocol(self) -> str:
msg = [
f"\nIn the '{self.excelname}' file only one sheet is allowed.",
f"The excel used contains the following sheets:{list_separator}{list_separator.join(self.sheet_names)}",
Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2json/lists.py
Expand Up @@ -308,7 +308,7 @@ def _make_json_lists_from_excel(
def _read_and_check_workbook(excelpath: Path) -> Worksheet:
all_worksheets = load_workbook(excelpath, read_only=True).worksheets
if len(all_worksheets) != 1:
msg = str(MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets]))
msg = MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets]).execute_error_protocol()
raise InputError(msg)
return all_worksheets[0]

Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2json/properties.py
Expand Up @@ -497,6 +497,6 @@ def excel2properties(
def _read_check_property_df(excelfile: str) -> pd.DataFrame | None:
sheets_df_dict = read_and_clean_all_sheets(excelfile=excelfile)
if len(sheets_df_dict) != 1:
msg = str(MoreThanOneSheetProblem("properties.xlsx", list(sheets_df_dict.keys())))
msg = MoreThanOneSheetProblem("properties.xlsx", list(sheets_df_dict.keys())).execute_error_protocol()
raise InputError(msg)
return next(iter(sheets_df_dict.values()))
71 changes: 44 additions & 27 deletions src/dsp_tools/commands/excel2json/resources.py
Expand Up @@ -10,7 +10,9 @@

from dsp_tools.commands.excel2json.input_error import (
JsonValidationResourceProblem,
MissingValuesInRowProblem,
PositionInExcel,
Problem,
ResourcesSheetsNotAsExpected,
)
from dsp_tools.commands.excel2json.utils import check_column_for_duplicate, read_and_clean_all_sheets
Expand Down Expand Up @@ -191,17 +193,14 @@ def excel2resources(
and the success status (True if everything went well)
"""

resource_dfs = read_and_clean_all_sheets(excelfile)
classes_df = resource_dfs.pop("classes")
classes_df = prepare_dataframe(
df=classes_df,
required_columns=["name"],
location_of_sheet=f"Sheet 'classes' in file '{excelfile}'",
)
all_dfs = read_and_clean_all_sheets(excelfile)
classes_df, resource_dfs = _prepare_classes_df(all_dfs)

if validation_problem := _validate_excel_file(classes_df, resource_dfs):
err_msg = validation_problem.execute_error_protocol()
raise InputError(err_msg)
if validation_problems := _validate_excel_file(classes_df, resource_dfs):
msg = "The excel file 'resources.xlsx', sheet 'classes' has a problem.\n" + "\n\n".join(
(x.execute_error_protocol() for x in validation_problems)
)
raise InputError(msg)

# transform every row into a resource
resources = [_row2resource(row, resource_dfs[row["name"]]) for i, row in classes_df.iterrows()]
Expand All @@ -217,26 +216,44 @@ def excel2resources(
return resources, True


def _validate_excel_file(
classes_df: pd.DataFrame, df_dict: dict[str, pd.DataFrame]
) -> ResourcesSheetsNotAsExpected | None:
for i, row in classes_df.iterrows():
index = int(str(i)) # index is a label/index/hashable, but we need an int
if not check_notna(row["super"]):
raise UserError(
f"Sheet 'classes' of 'resources.xlsx' has a missing value in row {index + 2}, column 'super'"
)
def _prepare_classes_df(resource_dfs: dict[str, pd.DataFrame]) -> tuple[pd.DataFrame, dict[str, pd.DataFrame]]:
resource_dfs = {k.strip(): v for k, v in resource_dfs.items()}
sheet_name_list = list(resource_dfs)
cls_sheet_name = [
ok.group(0) for x in sheet_name_list if (ok := regex.search(r"classes", flags=regex.IGNORECASE, string=x))
]
if not cls_sheet_name:
msg = ResourcesSheetsNotAsExpected(set(), names_sheets={"classes"}).execute_error_protocol()
raise InputError(msg)
elif len(cls_sheet_name) == 1:
classes_df = resource_dfs.pop(cls_sheet_name[0])
else:
msg = (
"The excel file 'resources.xlsx' has some problems.\n"
"There is more than one excel sheet called 'classes'.\n"
"This is a protected name and cannot be used for other sheets."
)
raise InputError(msg)
classes_df = prepare_dataframe(
df=classes_df,
required_columns=["name"],
location_of_sheet="Sheet 'classes' in file 'resources.xlsx'",
)
return classes_df, resource_dfs


def _validate_excel_file(classes_df: pd.DataFrame, df_dict: dict[str, pd.DataFrame]) -> list[Problem]:
if any(classes_df.get(lang) is not None for lang in languages):
warnings.warn(
f"The file 'resources.xlsx' uses {languages} as column titles, which is deprecated. "
f"Please use {[f'label_{lang}' for lang in languages]}"
)
duplicate_check = check_column_for_duplicate(classes_df, "name")
if duplicate_check:
msg = "The excel file 'resources.xlsx', sheet 'classes' has a problem.\n"
msg += duplicate_check.execute_error_protocol()
raise InputError(msg)
problems: list[Problem] = []
if missing_super_rows := [int(index) + 2 for index, row in classes_df.iterrows() if not check_notna(row["super"])]:
problems.append(MissingValuesInRowProblem(column="super", row_numbers=missing_super_rows))
if duplicate_check := check_column_for_duplicate(classes_df, "name"):
problems.append(duplicate_check)
# check that all the sheets have an entry in the names column and vice versa
if (all_names := set(classes_df["name"].tolist())) != (all_sheets := set(df_dict.keys())):
return ResourcesSheetsNotAsExpected(all_names, all_sheets)
return None
if (all_names := set(classes_df["name"].tolist())) != (all_sheets := set(df_dict)):
problems.append(ResourcesSheetsNotAsExpected(all_names, all_sheets))
return problems
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Expand Up @@ -270,7 +270,7 @@ def find_date_in_string(string: str) -> Optional[str]:


def prepare_value(
value: Union[PropertyElement, str, int, float, bool, Iterable[Union[PropertyElement, str, int, float, bool]]]
value: Union[PropertyElement, str, int, float, bool, Iterable[Union[PropertyElement, str, int, float, bool]]],
) -> list[PropertyElement]:
"""
This method transforms the parameter "value" from a make_*_prop() method into a list of PropertyElements. "value" is
Expand Down

0 comments on commit 71205f8

Please sign in to comment.