Skip to content

Commit

Permalink
chore: remove unused code (DEV-3152) (#712)
Browse files Browse the repository at this point in the history
  • Loading branch information
jnussbaum committed Jan 4, 2024
1 parent ff0e4fe commit 43cba62
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 90 deletions.
51 changes: 4 additions & 47 deletions src/dsp_tools/commands/fast_xmlupload/process_files.py
Expand Up @@ -9,8 +9,9 @@
import uuid
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from itertools import batched
from pathlib import Path, PurePath
from typing import Any, Literal, Optional, Union
from typing import Any, Optional, Union

import docker
import requests
Expand All @@ -19,7 +20,6 @@

from dsp_tools.models.exceptions import UserError
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.shared import make_chunks

logger = get_logger(__name__)
sipi_container: Optional[Container] = None
Expand All @@ -43,49 +43,6 @@ def _get_export_moving_image_frames_script() -> None:
f.write(script_text)


def _determine_exit_code(
files_to_process: list[Path],
processed_files: list[tuple[Path, Optional[Path]]],
is_last_batch: bool,
) -> Literal[0, 1, 2]:
"""
Based on the result of the file processing,
this function determines the exit code.
If some files of the current batch could not be processed, the exit code is 1.
If all files of the current batch were processed,
the exit code is 0 if this is the last batch,
and 2 if there are more batches to process.
Args:
files_to_process: list of all paths that should have been processed (current batch)
processed_files: list of tuples of Paths. If the second Path is None, the file could not be processed.
is_last_batch: true if this is the last batch of files to process
Returns:
exit code
"""
processed_paths = [x[1] for x in processed_files if x and x[1]]
if len(processed_paths) == len(files_to_process):
print(f"{datetime.now()}: All files ({len(files_to_process)}) of this batch were processed: Okay")
logger.info(f"All files ({len(files_to_process)}) of this batch were processed: Okay")
if is_last_batch:
print(f"{datetime.now()}: All multimedia files referenced in the XML are processed. No more batches.")
logger.info("All multimedia files referenced in the XML are processed. No more batches.")
return 0
else:
return 2
else:
ratio = f"{len(processed_paths)}/{len(files_to_process)}"
msg = f"Some files of this batch could not be processed: Only {ratio} were processed. The failed ones are:"
print(f"{datetime.now()}: ERROR: {msg}")
logger.error(msg)
for input_file, output_file in processed_files:
if not output_file:
print(f" - {input_file}")
logger.error(f" - {input_file}")
return 1


def _process_files_in_parallel(
files_to_process: list[Path],
input_dir: Path,
Expand Down Expand Up @@ -114,7 +71,7 @@ def _process_files_in_parallel(
msg = f"Processing {len(files_to_process)} files, in batches of {batchsize} files each..."
print(msg)
orig_filepath_2_uuid: list[tuple[Path, Optional[Path]]] = []
for batch in make_chunks(lst=files_to_process, length=batchsize):
for batch in batched(files_to_process, batchsize):
if unprocessed_paths := _launch_thread_pool(nthreads, input_dir, output_dir, batch, orig_filepath_2_uuid):
return orig_filepath_2_uuid, unprocessed_paths
print(f"Processed {len(orig_filepath_2_uuid)}/{len(files_to_process)} files")
Expand All @@ -125,7 +82,7 @@ def _launch_thread_pool(
nthreads: int | None,
input_dir: Path,
output_dir: Path,
files_to_process: list[Path],
files_to_process: tuple[Path, ...],
orig_filepath_2_uuid: list[tuple[Path, Optional[Path]]],
) -> list[Path]:
counter = 0
Expand Down
6 changes: 3 additions & 3 deletions src/dsp_tools/commands/fast_xmlupload/upload_files.py
Expand Up @@ -2,6 +2,7 @@
import pickle
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from itertools import batched
from pathlib import Path
from time import sleep
from typing import Optional
Expand All @@ -14,7 +15,6 @@
from dsp_tools.utils.connection import Connection
from dsp_tools.utils.connection_live import ConnectionLive
from dsp_tools.utils.create_logger import get_logger
from dsp_tools.utils.shared import make_chunks

logger = get_logger(__name__)

Expand Down Expand Up @@ -314,7 +314,7 @@ def _upload_files_in_parallel(
_description_
"""
result: list[tuple[Path, bool]] = []
for batch in make_chunks(lst=internal_filenames_of_processed_files, length=1000):
for batch in batched(internal_filenames_of_processed_files, 1000):
_launch_thread_pool(nthreads, dir_with_processed_files, sipi_url, con, batch, result)
return result

Expand All @@ -324,7 +324,7 @@ def _launch_thread_pool(
dir_with_processed_files: Path,
sipi_url: str,
con: Connection,
batch: list[Path],
batch: tuple[Path, ...],
result: list[tuple[Path, bool]],
) -> None:
with ThreadPoolExecutor(max_workers=nthreads) as pool:
Expand Down
3 changes: 1 addition & 2 deletions src/dsp_tools/commands/template.py
Expand Up @@ -24,8 +24,7 @@ def generate_template_repo() -> bool:
template_path_of_distribution = importlib.resources.files("dsp_tools").joinpath("resources/0100-template-repo")
for file in template_path_of_distribution.iterdir():
with importlib.resources.as_file(file) as f:
file_path = Path(f)
shutil.copy(file_path, template_path_of_user / file.name)
shutil.copy(f, template_path_of_user / file.name)
print(f"Created {template_path_of_user / file.name}")

return True
23 changes: 1 addition & 22 deletions src/dsp_tools/utils/shared.py
Expand Up @@ -7,7 +7,7 @@
import unicodedata
from datetime import datetime
from pathlib import Path
from typing import Any, Iterable, Optional, TypeGuard, TypeVar, Union
from typing import Any, Optional, TypeGuard, Union

import pandas as pd
import regex
Expand All @@ -20,27 +20,6 @@
logger = get_logger(__name__)


T = TypeVar("T")


def make_chunks(lst: list[T], length: int) -> Iterable[list[T]]:
"""
Split a list into length-sized chunks.
If length is greater than the length of the list,
the result will have only 1 chunk.
Args:
lst: list
length: length of the chunks
Yields:
chunks
"""
length = min(length, len(lst))
for i in range(0, len(lst), length):
yield lst[i : i + length]


def validate_xml_against_schema(input_file: Union[str, Path, etree._ElementTree[Any]]) -> bool:
"""
Validates an XML file against the DSP XSD schema.
Expand Down
6 changes: 1 addition & 5 deletions test/e2e/commands/xmlupload/test_list_client_live.py
@@ -1,17 +1,13 @@
import regex

from dsp_tools.commands.xmlupload.list_client import ListClientLive
from dsp_tools.commands.xmlupload.project_client import ProjectClientLive
from dsp_tools.utils.connection_live import ConnectionLive


def test_list_client_live() -> None:
con = ConnectionLive("http://localhost:3333")
con.login("root@example.com", "test")
project_client = ProjectClientLive(con, "0001")
project_iri = project_client.get_project_iri()
assert project_iri == "http://rdfh.ch/projects/0001"
list_client = ListClientLive(con, project_iri)
list_client = ListClientLive(con, "http://rdfh.ch/projects/0001")
list_node_id_to_iri_lookup = list_client.get_list_node_id_to_iri_lookup()
assert len(list_node_id_to_iri_lookup) >= 28
assert all(regex.search(r"^http://rdfh\.ch/lists/0001/.+$", x) for x in list_node_id_to_iri_lookup.values())
11 changes: 0 additions & 11 deletions test/unittests/utils/test_shared.py
Expand Up @@ -15,17 +15,6 @@


class TestShared(unittest.TestCase):
def test_make_chunks(self) -> None:
testcases = {
(range(10), 5): [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]],
(range(10), 9): [[0, 1, 2, 3, 4, 5, 6, 7, 8], [9]],
(range(10), 10): [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]],
(range(10), 11): [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]],
}
for _input, _output in testcases.items():
_output_actual = list(shared.make_chunks(lst=list(_input[0]), length=_input[1]))
self.assertListEqual(_output, _output_actual)

def test_validate_xml_against_schema(self) -> None:
self.assertTrue(shared.validate_xml_against_schema(input_file="testdata/xml-data/test-data-systematic.xml"))
self.assertTrue(
Expand Down

0 comments on commit 43cba62

Please sign in to comment.