dasch-swiss · Nora-Olivia-Ammann · Sep 15, 2023 · Sep 14, 2023 · Sep 14, 2023 · Sep 14, 2023
diff --git a/src/dsp_tools/utils/excel2json/properties.py b/src/dsp_tools/utils/excel2json/properties.py
@@ -313,16 +313,6 @@ def _do_property_excel_compliance(df: pd.DataFrame, excelfile: str) -> None:
     # If it does not pass any one of the tests, the function stops
     required_columns = {
         "name",
-        "label_en",
-        "label_de",
-        "label_fr",
-        "label_it",
-        "label_rm",
-        "comment_en",
-        "comment_de",
-        "comment_fr",
-        "comment_it",
-        "comment_rm",
         "super",
         "object",
         "gui_element",
@@ -442,6 +432,23 @@ def excel2properties(
 
     _do_property_excel_compliance(df=property_df, excelfile=excelfile)
 
+    # Not all columns have to be filled, users may delete some for ease of use, but it would generate an error later
+    property_df = utl.add_optional_columns(
+        df=property_df,
+        optional_col_set={
+            "label_en",
+            "label_de",
+            "label_fr",
+            "label_it",
+            "label_rm",
+            "comment_en",
+            "comment_de",
+            "comment_fr",
+            "comment_it",
+            "comment_rm",
+        },
+    )
+
     # transform every row into a property
     props: list[dict[str, Any]] = []
     for index, row in property_df.iterrows():

diff --git a/src/dsp_tools/utils/excel2json/utils.py b/src/dsp_tools/utils/excel2json/utils.py
@@ -284,3 +284,25 @@ def col_must_or_not_empty_based_on_other_col(
         return pd.Series(combined_array)
     else:
         return None
+
+
+def add_optional_columns(df: pd.DataFrame, optional_col_set: set[str]) -> pd.DataFrame:
+    """
+    This function takes a df and a set of columns which may not be in the df,
+    but whose absence could cause errors in the code following.
+    The columns are added, without any values in the rows.
+
+    Args:
+        df: Original df
+        optional_col_set: set of columns that may not be in the df, if they are not, they will be added.
+
+    Returns:
+        The df with the added columns.
+        If all are already there, the df is returned unchanged.
+    """
+    in_df_cols = set(df.columns)
+    if not optional_col_set.issubset(in_df_cols):
+        additional_col = list(optional_col_set.difference(in_df_cols))
+        additional_df = pd.DataFrame(columns=additional_col, index=df.index)
+        df = pd.concat(objs=[df, additional_df], axis=1)
+    return df
diff --git a/test/unittests/test_excel2json/test_utils.py b/test/unittests/test_excel2json/test_utils.py
@@ -179,6 +179,33 @@ def test_get_comments(self) -> None:
         returned_none = utl.get_comments(original_df.loc[1, :])
         self.assertIsNone(cast(None, returned_none))
 
+    def test_add_optional_columns(self) -> None:
+        original_df = pd.DataFrame(
+            {
+                "comment_en": ["text_en", pd.NA],
+                "comment_it": ["text_it", pd.NA],
+                "comment_rm": [pd.NA, pd.NA],
+            }
+        )
+        optional_cols = {"comment_en", "comment_de", "comment_fr", "comment_it", "comment_rm"}
+        expected_df = pd.DataFrame(
+            {
+                "comment_de": [pd.NA, pd.NA],
+                "comment_en": ["text_en", pd.NA],
+                "comment_fr": [pd.NA, pd.NA],
+                "comment_it": ["text_it", pd.NA],
+                "comment_rm": [pd.NA, pd.NA],
+            }
+        )
+        returned_df = utl.add_optional_columns(df=original_df, optional_col_set=optional_cols)
+        # as the columns are extracted via a set, they are not sorted and may appear in any order,
+        # this would cause the validation to fail
+        returned_df = returned_df.sort_index(axis=1)
+        assert_frame_equal(expected_df, returned_df)
+        # if all columns exist, the df should be returned unchanged
+        unchanged_df = utl.add_optional_columns(df=expected_df, optional_col_set=optional_cols)
+        assert_frame_equal(expected_df, unchanged_df)
+
 
 if __name__ == "__main__":
     pytest.main([__file__])