Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(excel2json): don't crash if optional columns are deleted in "properties" Excel file (DEV-2652) #518

Merged
merged 7 commits into from
Sep 15, 2023
27 changes: 17 additions & 10 deletions src/dsp_tools/utils/excel2json/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,16 +313,6 @@ def _do_property_excel_compliance(df: pd.DataFrame, excelfile: str) -> None:
# If it does not pass any one of the tests, the function stops
required_columns = {
"name",
"label_en",
"label_de",
"label_fr",
"label_it",
"label_rm",
"comment_en",
"comment_de",
"comment_fr",
"comment_it",
"comment_rm",
"super",
"object",
"gui_element",
Expand Down Expand Up @@ -442,6 +432,23 @@ def excel2properties(

_do_property_excel_compliance(df=property_df, excelfile=excelfile)

# Not all columns have to be filled, users may delete some for ease of use, but it would generate an error later
property_df = utl.add_optional_columns(
df=property_df,
optional_col_set={
"label_en",
"label_de",
"label_fr",
"label_it",
"label_rm",
"comment_en",
"comment_de",
"comment_fr",
"comment_it",
"comment_rm",
},
)

# transform every row into a property
props: list[dict[str, Any]] = []
for index, row in property_df.iterrows():
Expand Down
22 changes: 22 additions & 0 deletions src/dsp_tools/utils/excel2json/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,25 @@ def col_must_or_not_empty_based_on_other_col(
return pd.Series(combined_array)
else:
return None


def add_optional_columns(df: pd.DataFrame, optional_col_set: set[str]) -> pd.DataFrame:
"""
This function takes a df and a set of columns which may not be in the df,
but whose absence could cause errors in the code following.
The columns are added, without any values in the rows.

Args:
df: Original df
optional_col_set: set of columns that may not be in the df, if they are not, they will be added.

Returns:
The df with the added columns.
If all are already there, the df is returned unchanged.
"""
in_df_cols = set(df.columns)
if not optional_col_set.issubset(in_df_cols):
jnussbaum marked this conversation as resolved.
Show resolved Hide resolved
additional_col = list(optional_col_set.difference(in_df_cols))
additional_df = pd.DataFrame(columns=additional_col, index=df.index)
df = pd.concat(objs=[df, additional_df], axis=1)
return df
27 changes: 27 additions & 0 deletions test/unittests/test_excel2json/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,33 @@ def test_get_comments(self) -> None:
returned_none = utl.get_comments(original_df.loc[1, :])
self.assertIsNone(cast(None, returned_none))

def test_add_optional_columns(self) -> None:
original_df = pd.DataFrame(
{
"comment_en": ["text_en", pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
}
)
optional_cols = {"comment_en", "comment_de", "comment_fr", "comment_it", "comment_rm"}
expected_df = pd.DataFrame(
{
"comment_de": [pd.NA, pd.NA],
"comment_en": ["text_en", pd.NA],
"comment_fr": [pd.NA, pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
}
)
returned_df = utl.add_optional_columns(df=original_df, optional_col_set=optional_cols)
# as the columns are extracted via a set, they are not sorted and may appear in any order,
# this would cause the validation to fail
returned_df = returned_df.sort_index(axis=1)
assert_frame_equal(expected_df, returned_df)
# if all columns exist, the df should be returned unchanged
unchanged_df = utl.add_optional_columns(df=expected_df, optional_col_set=optional_cols)
assert_frame_equal(expected_df, unchanged_df)


if __name__ == "__main__":
pytest.main([__file__])