Skip to content

Commit

Permalink
fix(excel2json): find hidden worksheets in excel (DEV-1483) (#679)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Dec 12, 2023
1 parent 24cb320 commit ffbf0c0
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 6 deletions.
16 changes: 16 additions & 0 deletions src/dsp_tools/commands/excel2json/input_error.py
Expand Up @@ -168,6 +168,22 @@ def execute_error_protocol(self) -> str:
return msg


@dataclass(frozen=True)
class MoreThanOneSheetProblem:
"""This class contains information if the excel containing the property values has more than one sheet."""

excelname: str
sheet_names: list[str]

def __str__(self) -> str:
msg = [
f"\nIn the '{self.excelname}' file only one sheet is allowed.",
f"The excel used contains the following sheets:{list_separator}{list_separator.join(self.sheet_names)}",
"Please delete all but one sheet.",
]
return separator.join(msg)


@dataclass(frozen=True)
class JsonValidationPropertyProblem:
"""This class contains information about a JSON property section that fails its validation against the schema."""
Expand Down
13 changes: 11 additions & 2 deletions src/dsp_tools/commands/excel2json/lists.py
Expand Up @@ -10,7 +10,8 @@
from openpyxl.cell import Cell
from openpyxl.worksheet.worksheet import Worksheet

from dsp_tools.models.exceptions import BaseError, UserError
from dsp_tools.commands.excel2json.input_error import MoreThanOneSheetProblem
from dsp_tools.models.exceptions import BaseError, InputError, UserError
from dsp_tools.utils.shared import simplify_name

list_of_lists_of_previous_cell_values: list[list[str]] = []
Expand Down Expand Up @@ -271,7 +272,7 @@ def _make_json_lists_from_excel(
startcol = 1

# make a dict with the language labels and the worksheets
lang_to_worksheet = {x.stem: load_workbook(x, read_only=True).worksheets[0] for x in excel_file_paths}
lang_to_worksheet: dict[str, Worksheet] = {x.stem: _read_and_check_workbook(x) for x in excel_file_paths}

# take English as base file. If English is not available, take a random one.
base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]
Expand Down Expand Up @@ -304,6 +305,14 @@ def _make_json_lists_from_excel(
return finished_lists


def _read_and_check_workbook(excelpath: Path) -> Worksheet:
all_worksheets = load_workbook(excelpath, read_only=True).worksheets
if len(all_worksheets) != 1:
msg = str(MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets]))
raise InputError(msg)
return all_worksheets[0]


def validate_lists_section_with_schema(
path_to_json_project_file: Optional[str] = None,
lists_section: Optional[list[dict[str, Any]]] = None,
Expand Down
14 changes: 12 additions & 2 deletions src/dsp_tools/commands/excel2json/properties.py
Expand Up @@ -13,6 +13,7 @@
InvalidExcelContentProblem,
JsonValidationPropertyProblem,
MissingValuesInRowProblem,
MoreThanOneSheetProblem,
PositionInExcel,
Problem,
)
Expand All @@ -26,7 +27,7 @@
get_comments,
get_labels,
get_wrong_row_numbers,
read_and_clean_excel_file,
read_and_clean_all_sheets,
)
from dsp_tools.models.exceptions import InputError

Expand Down Expand Up @@ -449,7 +450,8 @@ def excel2properties(
a tuple consisting of the "properties" section as a Python list,
and the success status (True if everything went well)
"""
property_df = read_and_clean_excel_file(excelfile=excelfile)

property_df = _read_check_property_df(excelfile)

property_df = _rename_deprecated_columnnames(df=property_df, excelfile=excelfile)

Expand Down Expand Up @@ -490,3 +492,11 @@ def excel2properties(
print(f"properties section was created successfully and written to file '{path_to_output_file}'")

return props, True


def _read_check_property_df(excelfile: str) -> pd.DataFrame | None:
sheets_df_dict = read_and_clean_all_sheets(excelfile=excelfile)
if len(sheets_df_dict) != 1:
msg = str(MoreThanOneSheetProblem("properties.xlsx", list(sheets_df_dict.keys())))
raise InputError(msg)
return list(sheets_df_dict.values())[0]
5 changes: 3 additions & 2 deletions src/dsp_tools/commands/excel2json/utils.py
@@ -1,5 +1,6 @@
from __future__ import annotations

from pathlib import Path
from typing import Any
from unittest import mock

Expand Down Expand Up @@ -44,7 +45,7 @@ def read_and_clean_excel_file(excelfile: str, sheetname: str | int = 0) -> pd.Da
return read_df


def read_and_clean_all_sheets(excelfile: str) -> dict[str, pd.DataFrame]:
def read_and_clean_all_sheets(excelfile: str | Path) -> dict[str, pd.DataFrame]:
"""
This function reads an Excel file with all its sheets.
If there is a ValueError, it patches the openpyxl part that causes the error
Expand Down Expand Up @@ -73,7 +74,7 @@ def read_and_clean_all_sheets(excelfile: str) -> dict[str, pd.DataFrame]:
try:
return {name.strip(""): clean_data_frame(df) for name, df in df_dict.items()}
except AttributeError:
msg = InvalidSheetNameProblem(excelfile, list(df_dict.keys())).execute_error_protocol()
msg = InvalidSheetNameProblem(str(excelfile), list(df_dict.keys())).execute_error_protocol()
raise InputError(msg) from None


Expand Down

0 comments on commit ffbf0c0

Please sign in to comment.