Skip to content

Commit

Permalink
feat(excel2json): add optional column "subject" to properties.xlsx (D…
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Jan 31, 2024
1 parent 3676c35 commit cf491e9
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 65 deletions.
Binary file modified docs/assets/data_model_templates/rosetta (rosetta)/properties.xlsx
Binary file not shown.
3 changes: 3 additions & 0 deletions docs/file-formats/excel2json.md
Expand Up @@ -147,6 +147,9 @@ The expected columns are:
e.g. `TextValue`, `ListValue`, or `IntValue`.
- If the property is derived from `hasLinkTo`,
the `object` specifies the resource class that this property refers to.
- [`subject`](./json-project/ontologies.md#property-subject)
(optional): The subject defines the resource class the property can be used on.
It has to be provided as prefixed name of the resource class.
- [`gui_element`](./json-project/ontologies.md#property-object-gui_element-gui_attributes)
(mandatory): The graphic component, defines how this property should be displayed.
Depends on the value of `object`:
Expand Down
43 changes: 27 additions & 16 deletions src/dsp_tools/commands/excel2json/properties.py
@@ -1,3 +1,5 @@
from __future__ import annotations

import importlib.resources
import json
import warnings
Expand All @@ -18,7 +20,6 @@
Problem,
)
from dsp_tools.commands.excel2json.utils import (
add_optional_columns,
check_column_for_duplicate,
check_contains_required_columns,
check_required_values,
Expand Down Expand Up @@ -63,21 +64,7 @@ def excel2properties(
_do_property_excel_compliance(df=property_df, excelfile=excelfile)

# Not all columns have to be filled, users may delete some for ease of use, but it would generate an error later
property_df = add_optional_columns(
df=property_df,
optional_col_set={
"label_en",
"label_de",
"label_fr",
"label_it",
"label_rm",
"comment_en",
"comment_de",
"comment_fr",
"comment_it",
"comment_rm",
},
)
property_df = _add_optional_columns(df=property_df)

# transform every row into a property
props = [
Expand Down Expand Up @@ -165,6 +152,28 @@ def _do_property_excel_compliance(df: pd.DataFrame, excelfile: str) -> None:
raise InputError("\n\n".join(msg))


def _add_optional_columns(df: pd.DataFrame) -> pd.DataFrame:
optional_col_set = {
"label_en",
"label_de",
"label_fr",
"label_it",
"label_rm",
"comment_en",
"comment_de",
"comment_fr",
"comment_it",
"comment_rm",
"subject",
}
in_df_cols = set(df.columns)
if not optional_col_set.issubset(in_df_cols):
additional_col = list(optional_col_set.difference(in_df_cols))
additional_df = pd.DataFrame(columns=additional_col, index=df.index, data=pd.NA)
df = pd.concat(objs=[df, additional_df], axis=1)
return df


def _check_missing_values_in_row(df: pd.DataFrame) -> None | list[MissingValuesInRowProblem]:
required_values = ["name", "super", "object", "gui_element"]
missing_dict = check_required_values(df=df, required_values_columns=required_values)
Expand Down Expand Up @@ -215,6 +224,8 @@ def _row2prop(df_row: pd.Series, row_num: int, excelfile: str) -> dict[str, Any]
"labels": get_labels(df_row=df_row),
"super": [s.strip() for s in df_row["super"].split(",")],
}
if not pd.isna(df_row["subject"]):
_property["subject"] = df_row["subject"]

gui_attrib = _get_gui_attribute(df_row=df_row, row_num=row_num)
match gui_attrib:
Expand Down
22 changes: 0 additions & 22 deletions src/dsp_tools/commands/excel2json/utils.py
Expand Up @@ -314,25 +314,3 @@ def col_must_or_not_empty_based_on_other_col(
# If both are True logical_and returns True otherwise False
combined_array = np.logical_and(na_series, substring_array)
return pd.Series(combined_array) if any(combined_array) else None


def add_optional_columns(df: pd.DataFrame, optional_col_set: set[str]) -> pd.DataFrame:
"""
This function takes a df and a set of columns which may not be in the df,
but whose absence could cause errors in the code following.
The columns are added, without any values in the rows.
Args:
df: Original df
optional_col_set: set of columns that may not be in the df, if they are not, they will be added.
Returns:
The df with the added columns.
If all are already there, the df is returned unchanged.
"""
in_df_cols = set(df.columns)
if not optional_col_set.issubset(in_df_cols):
additional_col = list(optional_col_set.difference(in_df_cols))
additional_df = pd.DataFrame(columns=additional_col, index=df.index, data=pd.NA)
df = pd.concat(objs=[df, additional_df], axis=1)
return df
50 changes: 50 additions & 0 deletions test/unittests/commands/excel2json/test_properties.py
Expand Up @@ -504,6 +504,7 @@ def test_row2prop(self) -> None:
expected_dict = {
"name": "name_1",
"object": "object_1",
"subject": "subject_1",
"gui_element": "Simple",
"labels": {
"en": "label_en_1",
Expand Down Expand Up @@ -531,6 +532,7 @@ def test_row2prop(self) -> None:
"labels": {"en": "label_en_2"},
"name": "name_2",
"object": "object_2",
"subject": "subject_2",
"super": ["super_2.1", "super_2.2"],
}
self.assertDictEqual(expected_dict, returned_dict)
Expand Down Expand Up @@ -626,6 +628,54 @@ def test_invalid_gui_attrib_values(self) -> None:
path_to_output_file="",
)

def test_add_optional_columns_with_missing_cols(self) -> None:
original_df = pd.DataFrame(
{
"comment_en": ["text_en", pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
}
)
expected_df = pd.DataFrame(
{
"comment_de": [pd.NA, pd.NA],
"comment_en": ["text_en", pd.NA],
"comment_fr": [pd.NA, pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
"label_de": [pd.NA, pd.NA],
"label_en": [pd.NA, pd.NA],
"label_fr": [pd.NA, pd.NA],
"label_it": [pd.NA, pd.NA],
"label_rm": [pd.NA, pd.NA],
"subject": [pd.NA, pd.NA],
}
)
returned_df = e2j._add_optional_columns(df=original_df)
# as the columns are extracted via a set, they are not sorted and may appear in any order,
# this would cause the validation to fail
returned_df = returned_df.sort_index(axis=1)
assert_frame_equal(expected_df, returned_df)

def test_add_optional_columns_no_missing_cols(self) -> None:
expected_df = pd.DataFrame(
{
"comment_de": [pd.NA, pd.NA],
"comment_en": ["text_en", pd.NA],
"comment_fr": [pd.NA, pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
"label_de": [pd.NA, pd.NA],
"label_en": [pd.NA, pd.NA],
"label_fr": [pd.NA, pd.NA],
"label_it": [pd.NA, pd.NA],
"label_rm": [pd.NA, pd.NA],
"subject": [pd.NA, pd.NA],
}
)
unchanged_df = e2j._add_optional_columns(df=expected_df)
assert_frame_equal(expected_df, unchanged_df)


if __name__ == "__main__":
pytest.main([__file__])
27 changes: 0 additions & 27 deletions test/unittests/commands/excel2json/test_utils.py
Expand Up @@ -176,33 +176,6 @@ def test_get_comments(self) -> None:

assert not utl.get_comments(original_df.loc[1, :])

def test_add_optional_columns(self) -> None:
original_df = pd.DataFrame(
{
"comment_en": ["text_en", pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
}
)
optional_cols = {"comment_en", "comment_de", "comment_fr", "comment_it", "comment_rm"}
expected_df = pd.DataFrame(
{
"comment_de": [pd.NA, pd.NA],
"comment_en": ["text_en", pd.NA],
"comment_fr": [pd.NA, pd.NA],
"comment_it": ["text_it", pd.NA],
"comment_rm": [pd.NA, pd.NA],
}
)
returned_df = utl.add_optional_columns(df=original_df, optional_col_set=optional_cols)
# as the columns are extracted via a set, they are not sorted and may appear in any order,
# this would cause the validation to fail
returned_df = returned_df.sort_index(axis=1)
assert_frame_equal(expected_df, returned_df)
# if all columns exist, the df should be returned unchanged
unchanged_df = utl.add_optional_columns(df=expected_df, optional_col_set=optional_cols)
assert_frame_equal(expected_df, unchanged_df)


if __name__ == "__main__":
pytest.main([__file__])
3 changes: 3 additions & 0 deletions testdata/excel2json/excel2json-expected-output.json
Expand Up @@ -162,6 +162,7 @@
"super": [
"hasValue"
],
"subject": ":Person",
"object": "ListValue",
"labels": {
"fr": "only French"
Expand Down Expand Up @@ -241,6 +242,7 @@
"super": [
"hasValue"
],
"subject": ":Person",
"object": "IntValue",
"labels": {
"en": "age",
Expand Down Expand Up @@ -458,6 +460,7 @@
"super": [
"isPartOf"
],
"subject": ":Image",
"object": ":Documents",
"labels": {
"en": "is part of a document",
Expand Down
Binary file not shown.

0 comments on commit cf491e9

Please sign in to comment.