Skip to content

Commit

Permalink
feat(excel2json): new error implementation for json validation (DEV-3047
Browse files Browse the repository at this point in the history
) (#664)
  • Loading branch information
Nora-Olivia-Ammann committed Dec 1, 2023
1 parent 6e307c2 commit fdbc545
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 139 deletions.
72 changes: 59 additions & 13 deletions src/dsp_tools/commands/excel2json/input_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,25 @@ def execute_error_protocol(self) -> str:
"""


@dataclass(frozen=True)
class PositionInExcel:
"""This class contains the information about the position of a value in the excel."""

sheet: str | None = None
column: str | None = None
row: int | None = None

def __str__(self) -> str:
msg = []
if self.sheet:
msg.append(f"Sheet '{self.sheet}'")
if self.column:
msg.append(f"Column '{self.column}'")
if self.row:
msg.append(f"Row {self.row}")
return "Located at: " + " | ".join(msg)


@dataclass(frozen=True)
class RequiredColumnMissingProblem:
"""This class contains information if a required column is missing."""
Expand Down Expand Up @@ -78,8 +97,7 @@ class InvalidExcelContentProblem:

expected_content: str
actual_content: str
column: str
row: int
excel_position: PositionInExcel

def execute_error_protocol(self) -> str:
"""
Expand All @@ -89,8 +107,9 @@ def execute_error_protocol(self) -> str:
message for the error
"""
return (
f"There is invalid content in the column: '{self.column}', row: {self.row}{separator}"
f"Expected Content: {self.expected_content}{separator}"
f"There is invalid content in the excel.\n"
f"{str(self.excel_position)}\n"
f"Expected Content: {self.expected_content}\n"
f"Actual Content: {self.actual_content}"
)

Expand All @@ -99,11 +118,10 @@ def execute_error_protocol(self) -> str:
class JsonValidationPropertyProblem:
"""This class contains information about a JSON property section that fails its validation against the schema."""

problematic_value: str | None = None
problematic_property: str | None = None
original_msg: str | None = None
message_path: str | None = None
excel_column: str | None = None
excel_row: int | None = None
excel_position: PositionInExcel | None = None

def execute_error_protocol(self) -> str:
"""
Expand All @@ -115,14 +133,42 @@ def execute_error_protocol(self) -> str:
msg = [
f"{separator}Section of the problem: 'Properties'",
]
if self.problematic_value:
msg.append(f"Problematic value: '{self.problematic_value}'")
if self.excel_row:
msg.append(f"The problem is caused by the value in the Excel row {self.excel_row}")
if self.excel_column:
msg.append(f"The problem is caused by the value in the Excel column '{self.excel_column}'")
if self.problematic_property:
msg.append(f"Problematic property: '{self.problematic_property}'")
if self.excel_position:
msg.append(str(self.excel_position))
if self.original_msg:
msg.append(f"Original Error Message:\n{self.original_msg}")
if self.message_path:
msg.append(f"The error occurred at {self.message_path}")
return separator.join(msg)


@dataclass(frozen=True)
class JsonValidationResourceProblem:
"""This class contains information about a JSON resource section that fails its validation against the schema."""

problematic_resource: str | None = None
excel_position: PositionInExcel | None = None
original_msg: str | None = None
message_path: str | None = None

def execute_error_protocol(self) -> str:
"""
This function initiates all the steps for successful problem communication with the user.
Returns:
message for the error
"""
msg = [
f"{separator}Section of the problem: 'Resources'",
]
if self.problematic_resource:
msg.append(f"Problematic Resource '{self.problematic_resource}'")
if self.excel_position:
msg.append(str(self.excel_position))
if self.original_msg:
msg.append(f"Original Error Message:{separator}{self.original_msg}")
if self.message_path:
msg.append(f"The error occurred at {self.message_path}")
return separator.join(msg)
48 changes: 29 additions & 19 deletions src/dsp_tools/commands/excel2json/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,33 @@
import pandas as pd
import regex

import dsp_tools.commands.excel2json.utils as utl
from dsp_tools.commands.excel2json.input_error import (
InvalidExcelContentProblem,
JsonValidationPropertyProblem,
MissingValuesInRowProblem,
PositionInExcel,
Problem,
)
from dsp_tools.commands.excel2json.utils import (
add_optional_columns,
check_column_for_duplicate,
check_contains_required_columns,
check_required_values,
col_must_or_not_empty_based_on_other_col,
find_one_full_cell_in_cols,
get_comments,
get_labels,
get_wrong_row_numbers,
read_and_clean_excel_file,
)
from dsp_tools.models.exceptions import InputError

languages = ["en", "de", "fr", "it", "rm"]
language_label_col = ["label_en", "label_de", "label_fr", "label_it", "label_rm"]
mandatory_properties = ["name", "object", "gui_element"]


def _search_json_validation_error_get_err_msg_str(
def _find_validation_problem(
properties_list: list[dict[str, Any]],
validation_error: jsonschema.ValidationError,
) -> JsonValidationPropertyProblem:
Expand Down Expand Up @@ -60,9 +72,8 @@ def _search_json_validation_error_get_err_msg_str(
val_msg = validation_error.message

return JsonValidationPropertyProblem(
problematic_value=wrong_property_name,
excel_row=excel_row,
excel_column=column,
problematic_property=wrong_property_name,
excel_position=PositionInExcel(column=column, row=excel_row),
original_msg=val_msg,
)
return JsonValidationPropertyProblem(
Expand Down Expand Up @@ -92,7 +103,7 @@ def _validate_properties(
try:
jsonschema.validate(instance=properties_list, schema=properties_schema)
except jsonschema.ValidationError as err:
err_msg = _search_json_validation_error_get_err_msg_str(properties_list=properties_list, validation_error=err)
err_msg = _find_validation_problem(properties_list=properties_list, validation_error=err)
msg = f"\nThe Excel file '{excelfile}' did not pass validation." + err_msg.execute_error_protocol()
raise InputError(msg) from None

Expand Down Expand Up @@ -184,8 +195,7 @@ def _get_gui_attribute(
return InvalidExcelContentProblem(
expected_content="attribute: value, attribute: value",
actual_content=df_row["gui_attributes"],
column="gui_attributes",
row=row_num,
excel_position=PositionInExcel(column="gui_attributes", row=row_num),
)


Expand All @@ -205,7 +215,7 @@ def _row2prop(df_row: pd.Series, row_num: int, excelfile: str) -> dict[str, Any]
InputError: if there are any formal mistakes in the "gui_attributes" column
"""
_property = {x: df_row[x] for x in mandatory_properties} | {
"labels": utl.get_labels(df_row=df_row),
"labels": get_labels(df_row=df_row),
"super": [s.strip() for s in df_row["super"].split(",")],
}

Expand All @@ -217,7 +227,7 @@ def _row2prop(df_row: pd.Series, row_num: int, excelfile: str) -> dict[str, Any]
msg = f"There is a problem with the excel file: '{excelfile}'\n" + gui_attrib.execute_error_protocol()
raise InputError(msg) from None

if comment := utl.get_comments(df_row=df_row):
if comment := get_comments(df_row=df_row):
_property["comments"] = comment

return _property
Expand All @@ -241,15 +251,15 @@ def _check_compliance_gui_attributes(df: pd.DataFrame) -> dict[str, pd.Series] |
InputError if any of the checks fail
"""
mandatory_attributes = ["Spinbox", "List"]
mandatory_check = utl.col_must_or_not_empty_based_on_other_col(
mandatory_check = col_must_or_not_empty_based_on_other_col(
df=df,
substring_list=mandatory_attributes,
substring_colname="gui_element",
check_empty_colname="gui_attributes",
must_have_value=True,
)
no_attributes = ["Checkbox", "Date", "Geonames", "Richtext", "TimeStamp"]
no_attribute_check = utl.col_must_or_not_empty_based_on_other_col(
no_attribute_check = col_must_or_not_empty_based_on_other_col(
df=df,
substring_list=no_attributes,
substring_colname="gui_element",
Expand Down Expand Up @@ -285,9 +295,9 @@ def _check_missing_values_in_row(df: pd.DataFrame) -> None | list[MissingValuesI
# Every row in these columns must have a value
required_values = ["name", "super", "object", "gui_element"]
# If there are no problems, it returns an empty dict
missing_dict = utl.check_required_values(df=df, required_values_columns=required_values)
missing_dict = check_required_values(df=df, required_values_columns=required_values)
# This checks if the label columns have at least one value per row
missing_labels = utl.find_one_full_cell_in_cols(df=df, required_columns=language_label_col)
missing_labels = find_one_full_cell_in_cols(df=df, required_columns=language_label_col)
# If everything is ok, we get None, otherwise we update the dict
if missing_labels is not None:
missing_dict.update({"label": missing_labels})
Expand All @@ -297,7 +307,7 @@ def _check_missing_values_in_row(df: pd.DataFrame) -> None | list[MissingValuesI
missing_dict.update(missing_gui_attributes)
if missing_dict:
# Get the row numbers from the boolean series
missing_dict = utl.get_wrong_row_numbers(wrong_row_dict=missing_dict, true_remains=True)
missing_dict = get_wrong_row_numbers(wrong_row_dict=missing_dict, true_remains=True)
return [MissingValuesInRowProblem(column=col, row_numbers=row_nums) for col, row_nums in missing_dict.items()]
else:
return None
Expand Down Expand Up @@ -325,8 +335,8 @@ def _do_property_excel_compliance(df: pd.DataFrame, excelfile: str) -> None:
"gui_attributes",
}
problems: list[Problem | None] = [
utl.check_contains_required_columns_else_raise_error(df=df, required_columns=required_columns),
utl.check_column_for_duplicate(df=df, to_check_column="name"),
check_contains_required_columns(df=df, required_columns=required_columns),
check_column_for_duplicate(df=df, to_check_column="name"),
]
if missing_vals_check := _check_missing_values_in_row(df=df):
problems.extend(missing_vals_check)
Expand Down Expand Up @@ -439,14 +449,14 @@ def excel2properties(
a tuple consisting of the "properties" section as a Python list,
and the success status (True if everything went well)
"""
property_df = utl.read_and_clean_excel_file(excelfile=excelfile)
property_df = read_and_clean_excel_file(excelfile=excelfile)

property_df = _rename_deprecated_columnnames(df=property_df, excelfile=excelfile)

_do_property_excel_compliance(df=property_df, excelfile=excelfile)

# Not all columns have to be filled, users may delete some for ease of use, but it would generate an error later
property_df = utl.add_optional_columns(
property_df = add_optional_columns(
df=property_df,
optional_col_set={
"label_en",
Expand Down

0 comments on commit fdbc545

Please sign in to comment.