refactor: apply sourcery suggestions (#644)

dasch-swiss · Nov 16, 2023 · 96110a0 · 96110a0
1 parent 911cfa5
commit 96110a0
Show file tree

Hide file tree

Showing 23 changed files with 177 additions and 231 deletions.
diff --git a/src/dsp_tools/cli.py b/src/dsp_tools/cli.py
@@ -312,14 +312,15 @@ def _log_cli_arguments(parsed_args: argparse.Namespace) -> None:
     Args:
         parsed_args: parsed arguments
     """
-    metadata_lines = []
-    metadata_lines.append(f"DSP-TOOLS: Called the action '{parsed_args.action}' from the command line")
-    metadata_lines.append(f"DSP-TOOLS version: {_get_version()}")
-    metadata_lines.append(f"Location of this installation: {__file__}")
-    metadata_lines.append("CLI arguments:")
+    metadata_lines = [
+        f"DSP-TOOLS: Called the action '{parsed_args.action}' from the command line",
+        f"DSP-TOOLS version: {_get_version()}",
+        f"Location of this installation: {__file__}",
+        "CLI arguments:",
+    ]
     metadata_lines = [f"*** {line}" for line in metadata_lines]
 
-    parameter_lines = list()
+    parameter_lines = []
     parameters_to_log = {key: value for key, value in vars(parsed_args).items() if key != "action"}
     longest_key_length = max(len(key) for key in parameters_to_log) if parameters_to_log else 0
     for key, value in parameters_to_log.items():

diff --git a/src/dsp_tools/commands/excel2json/lists.py b/src/dsp_tools/commands/excel2json/lists.py
@@ -102,7 +102,7 @@ def _get_values_from_excel(
         dict: The JSON list up to the current recursion. At the last recursion, this is the final JSON list.
     """
     nodes: list[dict[str, Any]] = []
-    currentnode: dict[str, Any] = dict()
+    currentnode: dict[str, Any] = {}
     base_file_ws: Worksheet = list(base_file.values())[0]
     cell: Cell = base_file_ws.cell(column=col, row=row)
 
@@ -162,7 +162,7 @@ def _get_values_from_excel(
             # append a number (p.ex. node-name-2) if there are list nodes with identical names
             n = list_of_previous_node_names.count(nodename)
             if n > 1:
-                nodename = nodename + "-" + str(n)
+                nodename = f"{nodename}-{n}"
 
             # read label values from the other Excel files (other languages)
             labels_dict: dict[str, str] = {}
@@ -224,9 +224,10 @@ def _make_json_lists_from_excel(
     startcol = 1
 
     # make a dict with the language labels and the worksheets
-    lang_to_worksheet: dict[str, Worksheet] = {}
-    for filepath in excel_file_paths:
-        lang_to_worksheet[os.path.basename(filepath)[0:2]] = load_workbook(filepath, read_only=True).worksheets[0]
+    lang_to_worksheet = {
+        os.path.basename(filepath)[:2]: load_workbook(filepath, read_only=True).worksheets[0]
+        for filepath in excel_file_paths
+    }
 
     # take English as base file. If English is not available, take a random one.
     base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]

diff --git a/src/dsp_tools/commands/excel2json/project.py b/src/dsp_tools/commands/excel2json/project.py
@@ -51,7 +51,7 @@ def excel2json(
 
     processed_files = []
     onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
-    if len(onto_folders) == 0:
+    if not onto_folders:
         raise UserError(
             f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
         )

diff --git a/src/dsp_tools/commands/excel2json/properties.py b/src/dsp_tools/commands/excel2json/properties.py
@@ -37,8 +37,7 @@ def _search_json_validation_error_get_err_msg_str(
         A string which is used in the Error message that contains detailed information about the problem
     """
     err_msg_list = [f"The 'properties' section defined in the Excel file '{excelfile}' did not pass validation."]
-    json_path_to_property = regex.search(r"^\$\[(\d+)\]", validation_error.json_path)
-    if json_path_to_property:
+    if json_path_to_property := regex.search(r"^\$\[(\d+)\]", validation_error.json_path):
         # fmt: off
         wrong_property_name = (
             jsonpath_ng.ext.parse(json_path_to_property.group(0))
@@ -48,11 +47,10 @@ def _search_json_validation_error_get_err_msg_str(
         # fmt: on
         excel_row = int(json_path_to_property.group(1)) + 2
         err_msg_list.append(f"The problematic property is '{wrong_property_name}' in Excel row {excel_row}.")
-        affected_field = regex.search(
+        if affected_field := regex.search(
             r"name|labels|comments|super|subject|object|gui_element|gui_attributes",
             validation_error.json_path,
-        )
-        if affected_field:
+        ):
             err_msg_list.append(
                 f"The problem is that the column '{affected_field.group(0)}' has an invalid value: "
                 f"{validation_error.message}"
@@ -131,12 +129,12 @@ def _unpack_gui_attributes(attribute_str: str) -> dict[str, str]:
         IndexError: if the sub-lists do not contain each two items
     """
     # Create a list with several attributes
-    gui_list = [x.strip() for x in attribute_str.split(",") if not x.strip() == ""]
+    gui_list = [x.strip() for x in attribute_str.split(",") if x.strip() != ""]
     # create a sub list with the kex value pair of the attribute if it is an empty string we exclude it.
     # this error will be detected when checking for the length of the lists
     sub_gui_list = [[sub.strip() for sub in x.split(":") if sub.strip() != ""] for x in gui_list]
     # if not all sublist contain two items, something is wrong with the attribute
-    if not all(len(sub) == 2 for sub in sub_gui_list):
+    if any(len(sub) != 2 for sub in sub_gui_list):
         raise IndexError
     return {sub[0]: sub[1] for sub in sub_gui_list}
 
@@ -450,15 +448,14 @@ def excel2properties(
     )
 
     # transform every row into a property
-    props: list[dict[str, Any]] = []
-    for index, row in property_df.iterrows():
-        props.append(
-            _row2prop(
-                df_row=row,
-                row_num=int(str(index)) + 2,  # index is a label/index/hashable, but we need an int
-                excelfile=excelfile,
-            )
+    props = [
+        _row2prop(
+            df_row=row,
+            row_num=int(str(index)) + 2,  # index is a label/index/hashable, but we need an int
+            excelfile=excelfile,
         )
+        for index, row in property_df.iterrows()
+    ]
 
     # write final JSON file
     _validate_properties(properties_list=props, excelfile=excelfile)

diff --git a/src/dsp_tools/commands/excel2json/resources.py b/src/dsp_tools/commands/excel2json/resources.py
@@ -41,45 +41,42 @@ def _validate_resources(
         jsonschema.validate(instance=resources_list, schema=resources_schema)
     except jsonschema.ValidationError as err:
         err_msg = f"The 'resources' section defined in the Excel file '{excelfile}' did not pass validation. "
-        json_path_to_resource = regex.search(r"^\$\[(\d+)\]", err.json_path)
-        if json_path_to_resource:
+        if json_path_to_resource := regex.search(r"^\$\[(\d+)\]", err.json_path):
             # fmt: off
-            wrong_resource_name = (
+            wrong_res_name = (
                 jsonpath_ng.ext.parse(json_path_to_resource.group(0))
                 .find(resources_list)[0]
                 .value["name"]
             )
             # fmt: on
-            affected_field = regex.search(r"name|labels|comments|super|cardinalities\[(\d+)\]", err.json_path)
-            if affected_field and affected_field.group(0) in ["name", "labels", "comments", "super"]:
-                excel_row = int(json_path_to_resource.group(1)) + 2
-                err_msg += (
-                    f"The problem is that the Excel sheet 'classes' contains an invalid value for resource "
-                    f"'{wrong_resource_name}', in row {excel_row}, column '{affected_field.group(0)}': {err.message}"
-                )
-            elif affected_field and "cardinalities" in affected_field.group(0):
-                excel_row = int(affected_field.group(1)) + 2
-                if err.json_path.endswith("cardinality"):
+            if affected_field := regex.search(r"name|labels|comments|super|cardinalities\[(\d+)\]", err.json_path):
+                if affected_field.group(0) in ["name", "labels", "comments", "super"]:
+                    excel_row = int(json_path_to_resource.group(1)) + 2
                     err_msg += (
-                        f"The problem is that the Excel sheet '{wrong_resource_name}' contains an invalid value "
-                        f"in row {excel_row}, column 'Cardinality': {err.message}"
-                    )
-                elif err.json_path.endswith("propname"):
-                    err_msg += (
-                        f"The problem is that the Excel sheet '{wrong_resource_name}' contains an invalid value "
-                        f"in row {excel_row}, column 'Property': {err.message}"
+                        f"The problem is that the Excel sheet 'classes' contains an invalid value for resource "
+                        f"'{wrong_res_name}', in row {excel_row}, column '{affected_field.group(0)}': {err.message}"
                     )
+                elif "cardinalities" in affected_field.group(0):
+                    excel_row = int(affected_field.group(1)) + 2
+                    if err.json_path.endswith("cardinality"):
+                        err_msg += (
+                            f"The problem is that the Excel sheet '{wrong_res_name}' contains an invalid value "
+                            f"in row {excel_row}, column 'Cardinality': {err.message}"
+                        )
+                    elif err.json_path.endswith("propname"):
+                        err_msg += (
+                            f"The problem is that the Excel sheet '{wrong_res_name}' contains an invalid value "
+                            f"in row {excel_row}, column 'Property': {err.message}"
+                        )
         else:
             err_msg += f"The error message is: {err.message}\nThe error occurred at {err.json_path}"
         raise UserError(err_msg) from None
 
     # check if resource names are unique
     all_names = [r["name"] for r in resources_list]
-    duplicates: dict[int, str] = dict()
-    for index, resdef in enumerate(resources_list):
-        if all_names.count(resdef["name"]) > 1:
-            duplicates[index + 2] = resdef["name"]
-    if duplicates:
+    if duplicates := {
+        index + 2: resdef["name"] for index, resdef in enumerate(resources_list) if all_names.count(resdef["name"]) > 1
+    }:
         err_msg = (
             f"Resource names must be unique inside every ontology, "
             f"but your Excel file '{excelfile}' contains duplicates:\n"

diff --git a/src/dsp_tools/commands/excel2json/utils.py b/src/dsp_tools/commands/excel2json/utils.py
@@ -214,10 +214,7 @@ def get_comments(df_row: pd.Series) -> dict[str, str] | None:
         A dictionary with the language tag and the content of the cell
     """
     comments = {lang: df_row[f"comment_{lang}"] for lang in languages if df_row[f"comment_{lang}"] is not pd.NA}
-    if comments == {}:
-        return None
-    else:
-        return comments
+    return comments or None
 
 
 def find_one_full_cell_in_cols(df: pd.DataFrame, required_columns: list[str]) -> pd.Series | None:
@@ -238,10 +235,7 @@ def find_one_full_cell_in_cols(df: pd.DataFrame, required_columns: list[str]) ->
     # If all are True logical_and returns True otherwise False
     combined_array = np.logical_and.reduce(result_arrays)
     # if any of the values are True, it is turned into a pd.Series
-    if any(combined_array):
-        return pd.Series(combined_array)
-    else:
-        return None
+    return pd.Series(combined_array) if any(combined_array) else None
 
 
 def col_must_or_not_empty_based_on_other_col(
@@ -281,10 +275,7 @@ def col_must_or_not_empty_based_on_other_col(
     substring_array = df[substring_colname].str.contains("|".join(substring_list), na=False, regex=True)
     # If both are True logical_and returns True otherwise False
     combined_array = np.logical_and(na_series, substring_array)
-    if any(combined_array):
-        return pd.Series(combined_array)
-    else:
-        return None
+    return pd.Series(combined_array) if any(combined_array) else None
 
 
 def add_optional_columns(df: pd.DataFrame, optional_col_set: set[str]) -> pd.DataFrame:

diff --git a/src/dsp_tools/commands/excel2xml/excel2xml_cli.py b/src/dsp_tools/commands/excel2xml/excel2xml_cli.py
@@ -364,8 +364,7 @@ def _convert_row_to_property_elements(
             # if all other cells are empty, continue with next property element
             other_cell_headers = [f"{i}_{x}" for x in ["encoding", "permissions", "comment"]]
             notna_cell_headers = [x for x in other_cell_headers if check_notna(row.get(x))]
-            notna_cell_headers_str = ", ".join([f"'{x}'" for x in notna_cell_headers])
-            if notna_cell_headers_str:
+            if notna_cell_headers_str := ", ".join([f"'{x}'" for x in notna_cell_headers]):
                 warnings.warn(
                     f"Error in resource '{resource_id}': Excel row {row_number} has an entry "
                     f"in column(s) {notna_cell_headers_str}, but not in '{i}_value'. "
@@ -388,7 +387,7 @@ def _convert_row_to_property_elements(
         property_elements.append(PropertyElement(**kwargs_propelem))
 
     # validate the end result before returning it
-    if len(property_elements) == 0:
+    if not property_elements:
         warnings.warn(
             f"At least one value per property is required, "
             f"but resource '{resource_id}', property '{row['prop name']}' (Excel row {row_number}) doesn't contain any values."
@@ -468,13 +467,9 @@ def _create_property(
     kwargs_propfunc: dict[str, Union[str, PropertyElement, list[PropertyElement]]] = {
         "name": row["prop name"],
         "calling_resource": resource_id,
+        "value": property_elements[0] if row.get("prop type") == "boolean-prop" else property_elements,
     }
 
-    if row.get("prop type") == "boolean-prop":
-        kwargs_propfunc["value"] = property_elements[0]
-    else:
-        kwargs_propfunc["value"] = property_elements
-
     if check_notna(row.get("prop list")):
         kwargs_propfunc["list_name"] = str(row["prop list"])
 

diff --git a/src/dsp_tools/commands/excel2xml/excel2xml_lib.py b/src/dsp_tools/commands/excel2xml/excel2xml_lib.py
@@ -102,7 +102,6 @@ def find_date_in_string(string: str) -> Optional[str]:
     # sanitize input, just in case that the method was called on an empty or N/A cell
     if not check_notna(string):
         return None
-    string = str(string)
 
     months_dict = {
         "January": 1,
@@ -214,9 +213,9 @@ def find_date_in_string(string: str) -> Optional[str]:
     elif year_range:
         startyear = int(year_range.group(1))
         endyear = int(year_range.group(2))
-        if int(endyear / 100) == 0:
+        if endyear // 100 == 0:
             # endyear is only 2-digit: add the first two digits of startyear
-            endyear = int(startyear / 100) * 100 + endyear
+            endyear = startyear // 100 * 100 + endyear
 
     elif year_only:
         startyear = int(year_only.group(0))
@@ -1265,10 +1264,7 @@ def make_text_prop(
         kwargs = {"permissions": val.permissions}
         if check_notna(val.comment):
             kwargs["comment"] = val.comment
-        if check_notna(val.encoding):
-            kwargs["encoding"] = val.encoding
-        else:
-            kwargs["encoding"] = "utf8"
+        kwargs["encoding"] = val.encoding if check_notna(val.encoding) else "utf8"
         value_ = etree.Element(
             "{%s}text" % xml_namespace_map[None],
             **kwargs,  # type: ignore[arg-type]
@@ -1680,32 +1676,31 @@ def create_json_excel_list_mapping(
     corrections = corrections or {}
 
     # split the values, if necessary
-    excel_values_new = list()
+    excel_values_new = []
     for val in excel_values:
         if isinstance(val, str):
             excel_values_new.extend([x.strip() for x in val.split(sep) if x])
 
     # read the list of the JSON project (works also for nested lists)
     with open(path_to_json, encoding="utf-8") as f:
         json_file = json.load(f)
-    json_subset = list()
+    json_subset = []
     for elem in json_file["project"]["lists"]:
         if elem["name"] == list_name:
             json_subset = elem["nodes"]
     json_values = set(_nested_dict_values_iterator(json_subset))
 
     # build dictionary with the mapping, based on string similarity
-    res = dict()
+    res = {}
     for excel_value in excel_values_new:
         excel_value_corrected = corrections.get(excel_value, excel_value)
         excel_value_simpl = simplify_name(excel_value_corrected)  # increase match probability by removing illegal chars
-        matches: list[str] = difflib.get_close_matches(
+        if matches := difflib.get_close_matches(
             word=excel_value_simpl,
             possibilities=json_values,
             n=1,
             cutoff=0.6,
-        )
-        if matches:
+        ):
             res[excel_value] = matches[0]
             res[excel_value.lower()] = matches[0]
         else:
@@ -1731,8 +1726,7 @@ def _nested_dict_values_iterator(dicts: list[dict[str, Any]]) -> Iterable[str]:
     # Credits: https://thispointer.com/python-iterate-loop-over-all-nested-dictionary-values/
     for _dict in dicts:
         if "nodes" in _dict:
-            for value in _nested_dict_values_iterator(_dict["nodes"]):
-                yield value
+            yield from _nested_dict_values_iterator(_dict["nodes"])
         if "name" in _dict:
             yield _dict["name"]
 
@@ -1760,10 +1754,7 @@ def create_json_list_mapping(
     """
     with open(path_to_json, encoding="utf-8") as f:
         json_file = json.load(f)
-    json_subset = list()
-    for numbered_json_obj in json_file["project"]["lists"]:
-        if numbered_json_obj["name"] == list_name:
-            json_subset.append(numbered_json_obj)
+    json_subset = [x for x in json_file["project"]["lists"] if x["name"] == list_name]
     # json_subset is a list containing one item, namely the json object containing the entire json-list
 
     res = {}
@@ -1793,9 +1784,8 @@ def _name_label_mapper_iterator(
         # node is the json object containing the entire json-list
         if "nodes" in node:
             # "nodes" is the json sub-object containing the entries of the json-list
-            for value in _name_label_mapper_iterator(node["nodes"], language_label):
-                yield value
-                # "value" is a (label, name) pair of a single list entry
+            yield from _name_label_mapper_iterator(node["nodes"], language_label)
+            # each yielded value is a (label, name) pair of a single list entry
         if "name" in node:
             yield (node["labels"][language_label], node["name"])
             # the actual values of the name and the label