Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(excel2json): find hidden worksheets in excel (DEV-1483) #679

Merged
merged 4 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/dsp_tools/commands/excel2json/input_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,22 @@ def execute_error_protocol(self) -> str:
return msg


@dataclass(frozen=True)
class MoreThanOneSheetProblem:
"""This class contains information if the excel containing the property values has more than one sheet."""

excelname: str
sheet_names: list[str]

def __str__(self) -> str:
msg = [
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
f"\nIn the '{self.excelname}' file only one sheet is allowed.",
f"The excel used contains the following sheets:{list_separator}{list_separator.join(self.sheet_names)}",
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
"Please delete all but one sheet.",
]
return separator.join(msg)


@dataclass(frozen=True)
class JsonValidationPropertyProblem:
"""This class contains information about a JSON property section that fails its validation against the schema."""
Expand Down
13 changes: 11 additions & 2 deletions src/dsp_tools/commands/excel2json/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from openpyxl.cell import Cell
from openpyxl.worksheet.worksheet import Worksheet

from dsp_tools.models.exceptions import BaseError, UserError
from dsp_tools.commands.excel2json.input_error import MoreThanOneSheetProblem
from dsp_tools.models.exceptions import BaseError, InputError, UserError
from dsp_tools.utils.shared import simplify_name

list_of_lists_of_previous_cell_values: list[list[str]] = []
Expand Down Expand Up @@ -271,7 +272,7 @@ def _make_json_lists_from_excel(
startcol = 1

# make a dict with the language labels and the worksheets
lang_to_worksheet = {x.stem: load_workbook(x, read_only=True).worksheets[0] for x in excel_file_paths}
lang_to_worksheet: dict[str, Worksheet] = {x.stem: _read_and_check_workbook(x) for x in excel_file_paths}

# take English as base file. If English is not available, take a random one.
base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]
Expand Down Expand Up @@ -304,6 +305,14 @@ def _make_json_lists_from_excel(
return finished_lists


def _read_and_check_workbook(excelpath: Path) -> Worksheet:
all_worksheets = load_workbook(excelpath, read_only=True).worksheets
if len(all_worksheets) != 1:
msg = str(MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets]))
raise InputError(msg)
return all_worksheets[0]


def validate_lists_section_with_schema(
path_to_json_project_file: Optional[str] = None,
lists_section: Optional[list[dict[str, Any]]] = None,
Expand Down
14 changes: 12 additions & 2 deletions src/dsp_tools/commands/excel2json/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
InvalidExcelContentProblem,
JsonValidationPropertyProblem,
MissingValuesInRowProblem,
MoreThanOneSheetProblem,
PositionInExcel,
Problem,
)
Expand All @@ -26,7 +27,7 @@
get_comments,
get_labels,
get_wrong_row_numbers,
read_and_clean_excel_file,
read_and_clean_all_sheets,
)
from dsp_tools.models.exceptions import InputError

Expand Down Expand Up @@ -449,7 +450,8 @@ def excel2properties(
a tuple consisting of the "properties" section as a Python list,
and the success status (True if everything went well)
"""
property_df = read_and_clean_excel_file(excelfile=excelfile)

property_df = _read_check_property_df(excelfile)

property_df = _rename_deprecated_columnnames(df=property_df, excelfile=excelfile)

Expand Down Expand Up @@ -490,3 +492,11 @@ def excel2properties(
print(f"properties section was created successfully and written to file '{path_to_output_file}'")

return props, True


def _read_check_property_df(excelfile: str) -> pd.DataFrame | None:
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
sheets_df_dict = read_and_clean_all_sheets(excelfile=excelfile)
if len(sheets_df_dict) != 1:
msg = str(MoreThanOneSheetProblem("properties.xlsx", list(sheets_df_dict.keys())))
raise InputError(msg)
return list(sheets_df_dict.values())[0]
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 3 additions & 2 deletions src/dsp_tools/commands/excel2json/utils.py
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from pathlib import Path
from typing import Any
from unittest import mock

Expand Down Expand Up @@ -44,7 +45,7 @@ def read_and_clean_excel_file(excelfile: str, sheetname: str | int = 0) -> pd.Da
return read_df


def read_and_clean_all_sheets(excelfile: str) -> dict[str, pd.DataFrame]:
def read_and_clean_all_sheets(excelfile: str | Path) -> dict[str, pd.DataFrame]:
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
"""
This function reads an Excel file with all its sheets.
If there is a ValueError, it patches the openpyxl part that causes the error
Expand Down Expand Up @@ -73,7 +74,7 @@ def read_and_clean_all_sheets(excelfile: str) -> dict[str, pd.DataFrame]:
try:
return {name.strip(""): clean_data_frame(df) for name, df in df_dict.items()}
except AttributeError:
msg = InvalidSheetNameProblem(excelfile, list(df_dict.keys())).execute_error_protocol()
msg = InvalidSheetNameProblem(str(excelfile), list(df_dict.keys())).execute_error_protocol()
Nora-Olivia-Ammann marked this conversation as resolved.
Show resolved Hide resolved
raise InputError(msg) from None


Expand Down