Skip to content

Commit

Permalink
chore: remove dead code with vulture (#790)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann committed Feb 1, 2024
1 parent 51d4265 commit aba9aef
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 89 deletions.
46 changes: 0 additions & 46 deletions src/dsp_tools/commands/excel2json/utils.py
@@ -1,7 +1,6 @@
from __future__ import annotations

from pathlib import Path
from typing import Any
from unittest import mock

import numpy as np
Expand All @@ -18,33 +17,6 @@
languages = ["en", "de", "fr", "it", "rm"]


def read_and_clean_excel_file(excelfile: str, sheetname: str | int = 0) -> pd.DataFrame:
"""
This function reads an Excel file, if there is a ValueError then it patches the openpyxl part that creates the
error and opens it with that patch.
It cleans and then returns the pd.DataFrame.
Args:
excelfile: The name of the Excel file
sheetname: The name or index (zero-based) of the Excel sheet, by default it reads the first
Returns:
A pd.DataFrame
"""
try:
read_df: pd.DataFrame = pd.read_excel(excelfile, sheet_name=sheetname)
except ValueError:
# Pandas relies on openpyxl to parse XLSX files.
# A strange behavior of openpyxl prevents pandas from opening files with some formatting properties
# (unclear which formatting properties exactly).
# Apparently, the excel2json test files have one of the unsupported formatting properties.
# Credits: https://stackoverflow.com/a/70537454/14414188
with mock.patch("openpyxl.styles.fonts.Font.family.max", new=100):
read_df = pd.read_excel(excelfile, sheet_name=sheetname)
read_df = clean_data_frame(df=read_df)
return read_df


def read_and_clean_all_sheets(excelfile: str | Path) -> dict[str, pd.DataFrame]:
"""
This function reads an Excel file with all its sheets.
Expand Down Expand Up @@ -204,24 +176,6 @@ def get_wrong_row_numbers(wrong_row_dict: dict[str, pd.Series], true_remains: bo
return {k: [x + 2 for x in v] for k, v in wrong_row_dict.items()}


def update_dict_if_not_value_none(additional_dict: dict[Any, Any], to_update_dict: dict[Any, Any]) -> dict[Any, Any]:
"""
This function takes two dictionaries.
The "to_update_dict" should be updated with the information from the "additional_dict"
only if the value of a particular key is not None or pd.NA.
Args:
additional_dict: The dictionary which contains information that may be transferred
to_update_dict: The dictionary to which the new information should be transferred
Returns:
The "to_update_dict" which the additional information
"""
additional_dict = {k: v for k, v in additional_dict.items() if v is not None and v is not pd.NA}
to_update_dict.update(additional_dict)
return to_update_dict


def get_labels(df_row: pd.Series) -> dict[str, str]:
"""
This function takes a pd.Series which has "label_[language tag]" in the index.
Expand Down
34 changes: 0 additions & 34 deletions src/dsp_tools/commands/fast_xmlupload/process_files.py
Expand Up @@ -766,40 +766,6 @@ def handle_interruption(
sys.exit(1)


def double_check_unprocessed_files(
all_files: list[Path],
processed_files: list[Path],
unprocessed_files: list[Path],
) -> None:
"""
Checks if the files in 'unprocessed_files.txt' are consistent with the files in 'processed_files.txt'.
Args:
all_files: list of all paths in the <bitstream> tags of the XML file
processed_files: the paths from 'processed_files.txt'
unprocessed_files: the paths from 'unprocessed_files.txt' (or all_files if there is no such file)
Raises:
UserError: if there is a file 'unprocessed_files.txt', but no file 'processed_files.txt'
UserError: if the files 'unprocessed_files.txt' and 'processed_files.txt' are inconsistent
"""
unprocessed_files_txt_exists = sorted(unprocessed_files) != sorted(all_files)
if unprocessed_files_txt_exists and not processed_files:
logger.error("There is a file 'unprocessed_files.txt', but no file 'processed_files.txt'")
raise UserError("There is a file 'unprocessed_files.txt', but no file 'processed_files.txt'")

if processed_files and sorted(unprocessed_files) == sorted(all_files):
logger.error("There is a file 'processed_files.txt', but no file 'unprocessed_files.txt'")
raise UserError("There is a file 'processed_files.txt', but no file 'unprocessed_files.txt'")

if unprocessed_files_txt_exists:
# there is a 'unprocessed_files.txt' file. check it for consistency
unprocessed_files_from_processed_files = [x for x in all_files if x not in processed_files]
if sorted(unprocessed_files_from_processed_files) != sorted(unprocessed_files):
logger.error("The files 'unprocessed_files.txt' and 'processed_files.txt' are inconsistent")
raise UserError("The files 'unprocessed_files.txt' and 'processed_files.txt' are inconsistent")


def process_files(
input_dir: str,
output_dir: str,
Expand Down
9 changes: 0 additions & 9 deletions test/unittests/commands/excel2json/test_utils.py
Expand Up @@ -85,15 +85,6 @@ def test_get_wrong_row_numbers(self) -> None:
returned_dict = utl.get_wrong_row_numbers(wrong_row_dict=original_dict, true_remains=True)
self.assertDictEqual(expected_dict, returned_dict)

def test_update_dict_if_not_value_none(self) -> None:
original_dict = {0: 0}
original_update_dict = {1: 1, 2: 2, 3: None, 4: pd.NA, 5: "5"}
expected_dict = {0: 0, 1: 1, 2: 2, 5: "5"}
returned_dict = utl.update_dict_if_not_value_none(
additional_dict=original_update_dict, to_update_dict=original_dict
)
self.assertDictEqual(expected_dict, returned_dict)

def test_find_one_full_cell_in_cols(self) -> None:
required_cols = ["label_en", "label_de", "label_fr", "label_it", "label_rm"]
original_df = pd.DataFrame(
Expand Down

0 comments on commit aba9aef

Please sign in to comment.