Skip to content

Commit

Permalink
fix(excel2json): allow resources to have no cardinalities (DEV-3333) (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Feb 27, 2024
1 parent 7f98945 commit b55224a
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 50 deletions.
114 changes: 66 additions & 48 deletions src/dsp_tools/commands/excel2json/resources.py
Expand Up @@ -16,7 +16,7 @@
ResourcesSheetsNotAsExpected,
)
from dsp_tools.commands.excel2json.utils import check_column_for_duplicate, read_and_clean_all_sheets
from dsp_tools.models.exceptions import InputError, UserError
from dsp_tools.models.exceptions import InputError
from dsp_tools.utils.shared import check_notna, prepare_dataframe

languages = ["en", "de", "fr", "it", "rm"]
Expand Down Expand Up @@ -93,17 +93,17 @@ def _find_validation_problem(


def _row2resource(
df_row: pd.Series,
details_df: pd.DataFrame,
class_info_row: pd.Series,
class_df_with_cardinalities: pd.DataFrame,
) -> dict[str, Any]:
"""
Method that reads one row from the "classes" DataFrame,
opens the corresponding details DataFrame,
and builds a dict object of the resource.
Args:
df_row: row from the "classes" DataFrame
details_df: Excel sheet of the individual class
class_info_row: row from the "classes" DataFrame
class_df_with_cardinalities: Excel sheet of the individual class
Raises:
UserError: if the row or the details sheet contains invalid data
Expand All @@ -112,63 +112,81 @@ def _row2resource(
dict object of the resource
"""

name = df_row["name"]
labels = {lang: df_row[f"label_{lang}"] for lang in languages if df_row.get(f"label_{lang}")}
class_name = class_info_row["name"]
labels = {lang: class_info_row[f"label_{lang}"] for lang in languages if class_info_row.get(f"label_{lang}")}
if not labels:
labels = {lang: df_row[lang] for lang in languages if df_row.get(lang)}
comments = {lang: df_row[f"comment_{lang}"] for lang in languages if df_row.get(f"comment_{lang}")}
supers = [s.strip() for s in df_row["super"].split(",")]
labels = {lang: class_info_row[lang] for lang in languages if class_info_row.get(lang)}
supers = [s.strip() for s in class_info_row["super"].split(",")]

details_df = prepare_dataframe(
df=details_df,
resource = {"name": class_name, "super": supers, "labels": labels}

comments = {lang: class_info_row[f"comment_{lang}"] for lang in languages if class_info_row.get(f"comment_{lang}")}
if comments:
resource["comments"] = comments

cards = _make_cardinality_section(class_name, class_df_with_cardinalities)
if cards:
resource["cardinalities"] = cards

return resource


def _make_cardinality_section(class_name: str, class_df_with_cardinalities: pd.DataFrame) -> list[dict[str, str | int]]:
class_df_with_cardinalities = prepare_dataframe(
df=class_df_with_cardinalities,
required_columns=["Property", "Cardinality"],
location_of_sheet=f"Sheet '{name}' in file 'resources.xlsx'",
location_of_sheet=f"Sheet '{class_name}' in file 'resources.xlsx'",
)

# validation
# 4 cases:
# - column gui_order absent
# - column gui_order empty
# - column gui_order present but not properly filled in (missing values / not integers)
# - column gui_order present and properly filled in
all_gui_order_cells = []
if "gui_order" in details_df:
all_gui_order_cells = [x for x in details_df["gui_order"] if x]
validation_passed = True
if not all_gui_order_cells: # column gui_order absent or empty
pass
elif len(all_gui_order_cells) == len(details_df["property"]): # column gui_order filled in. try casting to int
try:
[int(float(x)) for x in details_df["gui_order"]]
except ValueError:
validation_passed = False
else: # column gui_order present but not properly filled in (missing values)
validation_passed = False
if not validation_passed:
raise UserError(
f"Sheet '{name}' in file 'resources.xlsx' has invalid content in column 'gui_order': "
f"only positive integers allowed (or leave column empty altogether)"
if len(class_df_with_cardinalities) == 0:
warnings.warn(
f"Sheet '{class_name}' in file 'resources.xlsx' does not have any properties listed.\n"
f"Creation of the resource class continues without 'cardinalities' section."
)
return []
cards = _create_all_cardinalities(class_name, class_df_with_cardinalities)
return cards


def _create_all_cardinalities(class_name: str, class_df_with_cardinalities: pd.DataFrame) -> list[dict[str, str | int]]:
class_df_with_cardinalities = _check_complete_gui_order(class_name, class_df_with_cardinalities)
cards = []
for i, detail_row in details_df.iterrows():
index = int(str(i)) # j is a label/index/hashable, but we need an int
gui_order = detail_row.get("gui_order", "")
gui_order = regex.sub(r"\.0+", "", str(gui_order))
for i, detail_row in class_df_with_cardinalities.iterrows():
property_ = {
"propname": ":" + detail_row["property"],
"cardinality": detail_row["cardinality"].lower(),
"gui_order": int(gui_order or index + 1), # if gui_order not given: take sheet order
"gui_order": detail_row["gui_order"],
}
cards.append(property_)
return cards

# build the dict structure of this resource and append it to the list of resources
resource = {"name": name, "super": supers, "labels": labels}
if comments:
resource["comments"] = comments
resource["cardinalities"] = cards

return resource
def _check_complete_gui_order(class_name: str, class_df_with_cardinalities: pd.DataFrame) -> pd.DataFrame:
detail_problem_msg = ""
if "gui_order" not in class_df_with_cardinalities:
detail_problem_msg = "the column 'gui_order' does not exist."
elif class_df_with_cardinalities["gui_order"].isna().any():
detail_problem_msg = "some rows in the column 'gui_order' are empty."

if not detail_problem_msg:
try:
class_df_with_cardinalities["gui_order"] = [int(float(x)) for x in class_df_with_cardinalities["gui_order"]]
return class_df_with_cardinalities
except ValueError:
detail_problem_msg = (
"some rows in the column 'gui_order' contain invalid characters "
"that could not be converted to an integer."
)

class_df_with_cardinalities["gui_order"] = list(range(1, len(class_df_with_cardinalities) + 1))

complete_msg = (
f"In the sheet '{class_name}' of the file 'resources.xlsx', "
f"{detail_problem_msg}\n"
f"Values have been filled in automatically, "
f"so that the gui-order reflects the order of the properties in the file."
)
warnings.warn(complete_msg)
return class_df_with_cardinalities


def excel2resources(
Expand Down
@@ -1,5 +1,3 @@
"""unit tests for excel to resource"""

import re
import unittest

Expand Down
61 changes: 61 additions & 0 deletions test/unittests/commands/excel2json/test_resources_low_level.py
@@ -0,0 +1,61 @@
import warnings

import pandas as pd
import pytest
from pandas.testing import assert_frame_equal

from dsp_tools.commands.excel2json.resources import _check_complete_gui_order


class TestCheckCompleteGuiOrder:
def test_column_does_not_exist(self) -> None:
df = pd.DataFrame({"prop": [1, 2, 3]})
expected_msg = (
"In the sheet 'class_name' of the file 'resources.xlsx', "
"the column 'gui_order' does not exist.\n"
"Values have been filled in automatically, "
"so that the gui-order reflects the order of the properties in the file."
)
expected_df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": [1, 2, 3]})
with pytest.warns(Warning, match=expected_msg):
res = _check_complete_gui_order("class_name", df)
assert_frame_equal(res, expected_df)

def test_na_in_row(self) -> None:
df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": [1, 2, pd.NA]})
expected_msg = (
"In the sheet 'class_name' of the file 'resources.xlsx', "
"some rows in the column 'gui_order' are empty.\n"
"Values have been filled in automatically, "
"so that the gui-order reflects the order of the properties in the file."
)
expected_df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": [1, 2, 3]})
with pytest.warns(Warning, match=expected_msg):
res = _check_complete_gui_order("class_name", df)
assert_frame_equal(res, expected_df)

def test_error(self) -> None:
df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": [1, 2, "a"]})
expected_msg = (
"In the sheet 'class_name' of the file 'resources.xlsx', "
"some rows in the column 'gui_order' contain invalid characters "
"that could not be converted to an integer.\n"
"Values have been filled in automatically, "
"so that the gui-order reflects the order of the properties in the file."
)
expected_df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": [1, 2, 3]})
with pytest.warns(Warning, match=expected_msg):
res = _check_complete_gui_order("class_name", df)
assert_frame_equal(res, expected_df)

def test_good(self) -> None:
df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": ["1", "2", "3"]})
expected_df = pd.DataFrame({"prop": [1, 2, 3], "gui_order": [1, 2, 3]})
with warnings.catch_warnings():
warnings.simplefilter("error")
res = _check_complete_gui_order("class_name", df)
assert_frame_equal(res, expected_df)


if __name__ == "__main__":
pytest.main([__file__])

0 comments on commit b55224a

Please sign in to comment.