From aba9aefa8af8bf9086a102118b2f69d5856a84f2 Mon Sep 17 00:00:00 2001
From: Nora-Olivia-Ammann
 <103038637+Nora-Olivia-Ammann@users.noreply.github.com>
Date: Thu, 1 Feb 2024 12:16:30 +0100
Subject: [PATCH] chore: remove dead code with vulture (#790)

---
 src/dsp_tools/commands/excel2json/utils.py    | 46 -------------------
 .../commands/fast_xmlupload/process_files.py  | 34 --------------
 .../commands/excel2json/test_utils.py         |  9 ----
 3 files changed, 89 deletions(-)

diff --git a/src/dsp_tools/commands/excel2json/utils.py b/src/dsp_tools/commands/excel2json/utils.py
index 82e09e76a..7e27441d7 100644
--- a/src/dsp_tools/commands/excel2json/utils.py
+++ b/src/dsp_tools/commands/excel2json/utils.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any
 from unittest import mock
 
 import numpy as np
@@ -18,33 +17,6 @@
 languages = ["en", "de", "fr", "it", "rm"]
 
 
-def read_and_clean_excel_file(excelfile: str, sheetname: str | int = 0) -> pd.DataFrame:
-    """
-    This function reads an Excel file, if there is a ValueError then it patches the openpyxl part that creates the
-    error and opens it with that patch.
-    It cleans and then returns the pd.DataFrame.
-
-    Args:
-        excelfile: The name of the Excel file
-        sheetname: The name or index (zero-based) of the Excel sheet, by default it reads the first
-
-    Returns:
-        A pd.DataFrame
-    """
-    try:
-        read_df: pd.DataFrame = pd.read_excel(excelfile, sheet_name=sheetname)
-    except ValueError:
-        # Pandas relies on openpyxl to parse XLSX files.
-        # A strange behavior of openpyxl prevents pandas from opening files with some formatting properties
-        # (unclear which formatting properties exactly).
-        # Apparently, the excel2json test files have one of the unsupported formatting properties.
-        # Credits: https://stackoverflow.com/a/70537454/14414188
-        with mock.patch("openpyxl.styles.fonts.Font.family.max", new=100):
-            read_df = pd.read_excel(excelfile, sheet_name=sheetname)
-    read_df = clean_data_frame(df=read_df)
-    return read_df
-
-
 def read_and_clean_all_sheets(excelfile: str | Path) -> dict[str, pd.DataFrame]:
     """
     This function reads an Excel file with all its sheets.
@@ -204,24 +176,6 @@ def get_wrong_row_numbers(wrong_row_dict: dict[str, pd.Series], true_remains: bo
     return {k: [x + 2 for x in v] for k, v in wrong_row_dict.items()}
 
 
-def update_dict_if_not_value_none(additional_dict: dict[Any, Any], to_update_dict: dict[Any, Any]) -> dict[Any, Any]:
-    """
-    This function takes two dictionaries.
-    The "to_update_dict" should be updated with the information from the "additional_dict"
-    only if the value of a particular key is not None or pd.NA.
-
-    Args:
-        additional_dict: The dictionary which contains information that may be transferred
-        to_update_dict: The dictionary to which the new information should be transferred
-
-    Returns:
-        The "to_update_dict" which the additional information
-    """
-    additional_dict = {k: v for k, v in additional_dict.items() if v is not None and v is not pd.NA}
-    to_update_dict.update(additional_dict)
-    return to_update_dict
-
-
 def get_labels(df_row: pd.Series) -> dict[str, str]:
     """
     This function takes a pd.Series which has "label_[language tag]" in the index.
diff --git a/src/dsp_tools/commands/fast_xmlupload/process_files.py b/src/dsp_tools/commands/fast_xmlupload/process_files.py
index a45edb366..59d7bb1c0 100644
--- a/src/dsp_tools/commands/fast_xmlupload/process_files.py
+++ b/src/dsp_tools/commands/fast_xmlupload/process_files.py
@@ -766,40 +766,6 @@ def handle_interruption(
     sys.exit(1)
 
 
-def double_check_unprocessed_files(
-    all_files: list[Path],
-    processed_files: list[Path],
-    unprocessed_files: list[Path],
-) -> None:
-    """
-    Checks if the files in 'unprocessed_files.txt' are consistent with the files in 'processed_files.txt'.
-
-    Args:
-        all_files: list of all paths in the <bitstream> tags of the XML file
-        processed_files: the paths from 'processed_files.txt'
-        unprocessed_files: the paths from 'unprocessed_files.txt' (or all_files if there is no such file)
-
-    Raises:
-        UserError: if there is a file 'unprocessed_files.txt', but no file 'processed_files.txt'
-        UserError: if the files 'unprocessed_files.txt' and 'processed_files.txt' are inconsistent
-    """
-    unprocessed_files_txt_exists = sorted(unprocessed_files) != sorted(all_files)
-    if unprocessed_files_txt_exists and not processed_files:
-        logger.error("There is a file 'unprocessed_files.txt', but no file 'processed_files.txt'")
-        raise UserError("There is a file 'unprocessed_files.txt', but no file 'processed_files.txt'")
-
-    if processed_files and sorted(unprocessed_files) == sorted(all_files):
-        logger.error("There is a file 'processed_files.txt', but no file 'unprocessed_files.txt'")
-        raise UserError("There is a file 'processed_files.txt', but no file 'unprocessed_files.txt'")
-
-    if unprocessed_files_txt_exists:
-        # there is a 'unprocessed_files.txt' file. check it for consistency
-        unprocessed_files_from_processed_files = [x for x in all_files if x not in processed_files]
-        if sorted(unprocessed_files_from_processed_files) != sorted(unprocessed_files):
-            logger.error("The files 'unprocessed_files.txt' and 'processed_files.txt' are inconsistent")
-            raise UserError("The files 'unprocessed_files.txt' and 'processed_files.txt' are inconsistent")
-
-
 def process_files(
     input_dir: str,
     output_dir: str,
diff --git a/test/unittests/commands/excel2json/test_utils.py b/test/unittests/commands/excel2json/test_utils.py
index e39746ef3..21f34cacb 100644
--- a/test/unittests/commands/excel2json/test_utils.py
+++ b/test/unittests/commands/excel2json/test_utils.py
@@ -85,15 +85,6 @@ def test_get_wrong_row_numbers(self) -> None:
         returned_dict = utl.get_wrong_row_numbers(wrong_row_dict=original_dict, true_remains=True)
         self.assertDictEqual(expected_dict, returned_dict)
 
-    def test_update_dict_if_not_value_none(self) -> None:
-        original_dict = {0: 0}
-        original_update_dict = {1: 1, 2: 2, 3: None, 4: pd.NA, 5: "5"}
-        expected_dict = {0: 0, 1: 1, 2: 2, 5: "5"}
-        returned_dict = utl.update_dict_if_not_value_none(
-            additional_dict=original_update_dict, to_update_dict=original_dict
-        )
-        self.assertDictEqual(expected_dict, returned_dict)
-
     def test_find_one_full_cell_in_cols(self) -> None:
         required_cols = ["label_en", "label_de", "label_fr", "label_it", "label_rm"]
         original_df = pd.DataFrame(