Skip to content

Commit

Permalink
refactor: apply sourcery suggestions (#644)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum committed Nov 16, 2023
1 parent 911cfa5 commit 96110a0
Show file tree
Hide file tree
Showing 23 changed files with 177 additions and 231 deletions.
13 changes: 7 additions & 6 deletions src/dsp_tools/cli.py
Expand Up @@ -312,14 +312,15 @@ def _log_cli_arguments(parsed_args: argparse.Namespace) -> None:
Args:
parsed_args: parsed arguments
"""
metadata_lines = []
metadata_lines.append(f"DSP-TOOLS: Called the action '{parsed_args.action}' from the command line")
metadata_lines.append(f"DSP-TOOLS version: {_get_version()}")
metadata_lines.append(f"Location of this installation: {__file__}")
metadata_lines.append("CLI arguments:")
metadata_lines = [
f"DSP-TOOLS: Called the action '{parsed_args.action}' from the command line",
f"DSP-TOOLS version: {_get_version()}",
f"Location of this installation: {__file__}",
"CLI arguments:",
]
metadata_lines = [f"*** {line}" for line in metadata_lines]

parameter_lines = list()
parameter_lines = []
parameters_to_log = {key: value for key, value in vars(parsed_args).items() if key != "action"}
longest_key_length = max(len(key) for key in parameters_to_log) if parameters_to_log else 0
for key, value in parameters_to_log.items():
Expand Down
11 changes: 6 additions & 5 deletions src/dsp_tools/commands/excel2json/lists.py
Expand Up @@ -102,7 +102,7 @@ def _get_values_from_excel(
dict: The JSON list up to the current recursion. At the last recursion, this is the final JSON list.
"""
nodes: list[dict[str, Any]] = []
currentnode: dict[str, Any] = dict()
currentnode: dict[str, Any] = {}
base_file_ws: Worksheet = list(base_file.values())[0]
cell: Cell = base_file_ws.cell(column=col, row=row)

Expand Down Expand Up @@ -162,7 +162,7 @@ def _get_values_from_excel(
# append a number (p.ex. node-name-2) if there are list nodes with identical names
n = list_of_previous_node_names.count(nodename)
if n > 1:
nodename = nodename + "-" + str(n)
nodename = f"{nodename}-{n}"

# read label values from the other Excel files (other languages)
labels_dict: dict[str, str] = {}
Expand Down Expand Up @@ -224,9 +224,10 @@ def _make_json_lists_from_excel(
startcol = 1

# make a dict with the language labels and the worksheets
lang_to_worksheet: dict[str, Worksheet] = {}
for filepath in excel_file_paths:
lang_to_worksheet[os.path.basename(filepath)[0:2]] = load_workbook(filepath, read_only=True).worksheets[0]
lang_to_worksheet = {
os.path.basename(filepath)[:2]: load_workbook(filepath, read_only=True).worksheets[0]
for filepath in excel_file_paths
}

# take English as base file. If English is not available, take a random one.
base_lang = "en" if "en" in lang_to_worksheet else list(lang_to_worksheet.keys())[0]
Expand Down
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/excel2json/project.py
Expand Up @@ -51,7 +51,7 @@ def excel2json(

processed_files = []
onto_folders = [x for x in folder if os.path.isdir(x) and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
if len(onto_folders) == 0:
if not onto_folders:
raise UserError(
f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
)
Expand Down
27 changes: 12 additions & 15 deletions src/dsp_tools/commands/excel2json/properties.py
Expand Up @@ -37,8 +37,7 @@ def _search_json_validation_error_get_err_msg_str(
A string which is used in the Error message that contains detailed information about the problem
"""
err_msg_list = [f"The 'properties' section defined in the Excel file '{excelfile}' did not pass validation."]
json_path_to_property = regex.search(r"^\$\[(\d+)\]", validation_error.json_path)
if json_path_to_property:
if json_path_to_property := regex.search(r"^\$\[(\d+)\]", validation_error.json_path):
# fmt: off
wrong_property_name = (
jsonpath_ng.ext.parse(json_path_to_property.group(0))
Expand All @@ -48,11 +47,10 @@ def _search_json_validation_error_get_err_msg_str(
# fmt: on
excel_row = int(json_path_to_property.group(1)) + 2
err_msg_list.append(f"The problematic property is '{wrong_property_name}' in Excel row {excel_row}.")
affected_field = regex.search(
if affected_field := regex.search(
r"name|labels|comments|super|subject|object|gui_element|gui_attributes",
validation_error.json_path,
)
if affected_field:
):
err_msg_list.append(
f"The problem is that the column '{affected_field.group(0)}' has an invalid value: "
f"{validation_error.message}"
Expand Down Expand Up @@ -131,12 +129,12 @@ def _unpack_gui_attributes(attribute_str: str) -> dict[str, str]:
IndexError: if the sub-lists do not contain each two items
"""
# Create a list with several attributes
gui_list = [x.strip() for x in attribute_str.split(",") if not x.strip() == ""]
gui_list = [x.strip() for x in attribute_str.split(",") if x.strip() != ""]
# create a sub list with the kex value pair of the attribute if it is an empty string we exclude it.
# this error will be detected when checking for the length of the lists
sub_gui_list = [[sub.strip() for sub in x.split(":") if sub.strip() != ""] for x in gui_list]
# if not all sublist contain two items, something is wrong with the attribute
if not all(len(sub) == 2 for sub in sub_gui_list):
if any(len(sub) != 2 for sub in sub_gui_list):
raise IndexError
return {sub[0]: sub[1] for sub in sub_gui_list}

Expand Down Expand Up @@ -450,15 +448,14 @@ def excel2properties(
)

# transform every row into a property
props: list[dict[str, Any]] = []
for index, row in property_df.iterrows():
props.append(
_row2prop(
df_row=row,
row_num=int(str(index)) + 2, # index is a label/index/hashable, but we need an int
excelfile=excelfile,
)
props = [
_row2prop(
df_row=row,
row_num=int(str(index)) + 2, # index is a label/index/hashable, but we need an int
excelfile=excelfile,
)
for index, row in property_df.iterrows()
]

# write final JSON file
_validate_properties(properties_list=props, excelfile=excelfile)
Expand Down
47 changes: 22 additions & 25 deletions src/dsp_tools/commands/excel2json/resources.py
Expand Up @@ -41,45 +41,42 @@ def _validate_resources(
jsonschema.validate(instance=resources_list, schema=resources_schema)
except jsonschema.ValidationError as err:
err_msg = f"The 'resources' section defined in the Excel file '{excelfile}' did not pass validation. "
json_path_to_resource = regex.search(r"^\$\[(\d+)\]", err.json_path)
if json_path_to_resource:
if json_path_to_resource := regex.search(r"^\$\[(\d+)\]", err.json_path):
# fmt: off
wrong_resource_name = (
wrong_res_name = (
jsonpath_ng.ext.parse(json_path_to_resource.group(0))
.find(resources_list)[0]
.value["name"]
)
# fmt: on
affected_field = regex.search(r"name|labels|comments|super|cardinalities\[(\d+)\]", err.json_path)
if affected_field and affected_field.group(0) in ["name", "labels", "comments", "super"]:
excel_row = int(json_path_to_resource.group(1)) + 2
err_msg += (
f"The problem is that the Excel sheet 'classes' contains an invalid value for resource "
f"'{wrong_resource_name}', in row {excel_row}, column '{affected_field.group(0)}': {err.message}"
)
elif affected_field and "cardinalities" in affected_field.group(0):
excel_row = int(affected_field.group(1)) + 2
if err.json_path.endswith("cardinality"):
if affected_field := regex.search(r"name|labels|comments|super|cardinalities\[(\d+)\]", err.json_path):
if affected_field.group(0) in ["name", "labels", "comments", "super"]:
excel_row = int(json_path_to_resource.group(1)) + 2
err_msg += (
f"The problem is that the Excel sheet '{wrong_resource_name}' contains an invalid value "
f"in row {excel_row}, column 'Cardinality': {err.message}"
)
elif err.json_path.endswith("propname"):
err_msg += (
f"The problem is that the Excel sheet '{wrong_resource_name}' contains an invalid value "
f"in row {excel_row}, column 'Property': {err.message}"
f"The problem is that the Excel sheet 'classes' contains an invalid value for resource "
f"'{wrong_res_name}', in row {excel_row}, column '{affected_field.group(0)}': {err.message}"
)
elif "cardinalities" in affected_field.group(0):
excel_row = int(affected_field.group(1)) + 2
if err.json_path.endswith("cardinality"):
err_msg += (
f"The problem is that the Excel sheet '{wrong_res_name}' contains an invalid value "
f"in row {excel_row}, column 'Cardinality': {err.message}"
)
elif err.json_path.endswith("propname"):
err_msg += (
f"The problem is that the Excel sheet '{wrong_res_name}' contains an invalid value "
f"in row {excel_row}, column 'Property': {err.message}"
)
else:
err_msg += f"The error message is: {err.message}\nThe error occurred at {err.json_path}"
raise UserError(err_msg) from None

# check if resource names are unique
all_names = [r["name"] for r in resources_list]
duplicates: dict[int, str] = dict()
for index, resdef in enumerate(resources_list):
if all_names.count(resdef["name"]) > 1:
duplicates[index + 2] = resdef["name"]
if duplicates:
if duplicates := {
index + 2: resdef["name"] for index, resdef in enumerate(resources_list) if all_names.count(resdef["name"]) > 1
}:
err_msg = (
f"Resource names must be unique inside every ontology, "
f"but your Excel file '{excelfile}' contains duplicates:\n"
Expand Down
15 changes: 3 additions & 12 deletions src/dsp_tools/commands/excel2json/utils.py
Expand Up @@ -214,10 +214,7 @@ def get_comments(df_row: pd.Series) -> dict[str, str] | None:
A dictionary with the language tag and the content of the cell
"""
comments = {lang: df_row[f"comment_{lang}"] for lang in languages if df_row[f"comment_{lang}"] is not pd.NA}
if comments == {}:
return None
else:
return comments
return comments or None


def find_one_full_cell_in_cols(df: pd.DataFrame, required_columns: list[str]) -> pd.Series | None:
Expand All @@ -238,10 +235,7 @@ def find_one_full_cell_in_cols(df: pd.DataFrame, required_columns: list[str]) ->
# If all are True logical_and returns True otherwise False
combined_array = np.logical_and.reduce(result_arrays)
# if any of the values are True, it is turned into a pd.Series
if any(combined_array):
return pd.Series(combined_array)
else:
return None
return pd.Series(combined_array) if any(combined_array) else None


def col_must_or_not_empty_based_on_other_col(
Expand Down Expand Up @@ -281,10 +275,7 @@ def col_must_or_not_empty_based_on_other_col(
substring_array = df[substring_colname].str.contains("|".join(substring_list), na=False, regex=True)
# If both are True logical_and returns True otherwise False
combined_array = np.logical_and(na_series, substring_array)
if any(combined_array):
return pd.Series(combined_array)
else:
return None
return pd.Series(combined_array) if any(combined_array) else None


def add_optional_columns(df: pd.DataFrame, optional_col_set: set[str]) -> pd.DataFrame:
Expand Down
11 changes: 3 additions & 8 deletions src/dsp_tools/commands/excel2xml/excel2xml_cli.py
Expand Up @@ -364,8 +364,7 @@ def _convert_row_to_property_elements(
# if all other cells are empty, continue with next property element
other_cell_headers = [f"{i}_{x}" for x in ["encoding", "permissions", "comment"]]
notna_cell_headers = [x for x in other_cell_headers if check_notna(row.get(x))]
notna_cell_headers_str = ", ".join([f"'{x}'" for x in notna_cell_headers])
if notna_cell_headers_str:
if notna_cell_headers_str := ", ".join([f"'{x}'" for x in notna_cell_headers]):
warnings.warn(
f"Error in resource '{resource_id}': Excel row {row_number} has an entry "
f"in column(s) {notna_cell_headers_str}, but not in '{i}_value'. "
Expand All @@ -388,7 +387,7 @@ def _convert_row_to_property_elements(
property_elements.append(PropertyElement(**kwargs_propelem))

# validate the end result before returning it
if len(property_elements) == 0:
if not property_elements:
warnings.warn(
f"At least one value per property is required, "
f"but resource '{resource_id}', property '{row['prop name']}' (Excel row {row_number}) doesn't contain any values."
Expand Down Expand Up @@ -468,13 +467,9 @@ def _create_property(
kwargs_propfunc: dict[str, Union[str, PropertyElement, list[PropertyElement]]] = {
"name": row["prop name"],
"calling_resource": resource_id,
"value": property_elements[0] if row.get("prop type") == "boolean-prop" else property_elements,
}

if row.get("prop type") == "boolean-prop":
kwargs_propfunc["value"] = property_elements[0]
else:
kwargs_propfunc["value"] = property_elements

if check_notna(row.get("prop list")):
kwargs_propfunc["list_name"] = str(row["prop list"])

Expand Down
34 changes: 12 additions & 22 deletions src/dsp_tools/commands/excel2xml/excel2xml_lib.py
Expand Up @@ -102,7 +102,6 @@ def find_date_in_string(string: str) -> Optional[str]:
# sanitize input, just in case that the method was called on an empty or N/A cell
if not check_notna(string):
return None
string = str(string)

months_dict = {
"January": 1,
Expand Down Expand Up @@ -214,9 +213,9 @@ def find_date_in_string(string: str) -> Optional[str]:
elif year_range:
startyear = int(year_range.group(1))
endyear = int(year_range.group(2))
if int(endyear / 100) == 0:
if endyear // 100 == 0:
# endyear is only 2-digit: add the first two digits of startyear
endyear = int(startyear / 100) * 100 + endyear
endyear = startyear // 100 * 100 + endyear

elif year_only:
startyear = int(year_only.group(0))
Expand Down Expand Up @@ -1265,10 +1264,7 @@ def make_text_prop(
kwargs = {"permissions": val.permissions}
if check_notna(val.comment):
kwargs["comment"] = val.comment
if check_notna(val.encoding):
kwargs["encoding"] = val.encoding
else:
kwargs["encoding"] = "utf8"
kwargs["encoding"] = val.encoding if check_notna(val.encoding) else "utf8"
value_ = etree.Element(
"{%s}text" % xml_namespace_map[None],
**kwargs, # type: ignore[arg-type]
Expand Down Expand Up @@ -1680,32 +1676,31 @@ def create_json_excel_list_mapping(
corrections = corrections or {}

# split the values, if necessary
excel_values_new = list()
excel_values_new = []
for val in excel_values:
if isinstance(val, str):
excel_values_new.extend([x.strip() for x in val.split(sep) if x])

# read the list of the JSON project (works also for nested lists)
with open(path_to_json, encoding="utf-8") as f:
json_file = json.load(f)
json_subset = list()
json_subset = []
for elem in json_file["project"]["lists"]:
if elem["name"] == list_name:
json_subset = elem["nodes"]
json_values = set(_nested_dict_values_iterator(json_subset))

# build dictionary with the mapping, based on string similarity
res = dict()
res = {}
for excel_value in excel_values_new:
excel_value_corrected = corrections.get(excel_value, excel_value)
excel_value_simpl = simplify_name(excel_value_corrected) # increase match probability by removing illegal chars
matches: list[str] = difflib.get_close_matches(
if matches := difflib.get_close_matches(
word=excel_value_simpl,
possibilities=json_values,
n=1,
cutoff=0.6,
)
if matches:
):
res[excel_value] = matches[0]
res[excel_value.lower()] = matches[0]
else:
Expand All @@ -1731,8 +1726,7 @@ def _nested_dict_values_iterator(dicts: list[dict[str, Any]]) -> Iterable[str]:
# Credits: https://thispointer.com/python-iterate-loop-over-all-nested-dictionary-values/
for _dict in dicts:
if "nodes" in _dict:
for value in _nested_dict_values_iterator(_dict["nodes"]):
yield value
yield from _nested_dict_values_iterator(_dict["nodes"])
if "name" in _dict:
yield _dict["name"]

Expand Down Expand Up @@ -1760,10 +1754,7 @@ def create_json_list_mapping(
"""
with open(path_to_json, encoding="utf-8") as f:
json_file = json.load(f)
json_subset = list()
for numbered_json_obj in json_file["project"]["lists"]:
if numbered_json_obj["name"] == list_name:
json_subset.append(numbered_json_obj)
json_subset = [x for x in json_file["project"]["lists"] if x["name"] == list_name]
# json_subset is a list containing one item, namely the json object containing the entire json-list

res = {}
Expand Down Expand Up @@ -1793,9 +1784,8 @@ def _name_label_mapper_iterator(
# node is the json object containing the entire json-list
if "nodes" in node:
# "nodes" is the json sub-object containing the entries of the json-list
for value in _name_label_mapper_iterator(node["nodes"], language_label):
yield value
# "value" is a (label, name) pair of a single list entry
yield from _name_label_mapper_iterator(node["nodes"], language_label)
# each yielded value is a (label, name) pair of a single list entry
if "name" in node:
yield (node["labels"][language_label], node["name"])
# the actual values of the name and the label
Expand Down

0 comments on commit 96110a0

Please sign in to comment.