From 98f0b976068321dce24b3104744ff7ce5a7c97f1 Mon Sep 17 00:00:00 2001 From: Johannes Nussbaum <39048939+jnussbaum@users.noreply.github.com> Date: Mon, 7 Aug 2023 14:53:35 +0200 Subject: [PATCH] fix(upload-files, fast-xmlupload): handle multiple pickle files (DEV-2500) (#451) --- docs/internal/fast-xmlupload.md | 12 +- src/dsp_tools/dsp_tools.py | 4 - src/dsp_tools/fast_xmlupload/process_files.py | 3 +- src/dsp_tools/fast_xmlupload/upload_files.py | 128 ++++++------ src/dsp_tools/fast_xmlupload/upload_xml.py | 42 ++-- test/e2e/test_fast_xmlupload.py | 59 ++++-- .../xml-data/test-data-fast-xmlupload.xml | 189 +----------------- 7 files changed, 149 insertions(+), 288 deletions(-) diff --git a/docs/internal/fast-xmlupload.md b/docs/internal/fast-xmlupload.md index 49c277759..f1070d4cd 100644 --- a/docs/internal/fast-xmlupload.md +++ b/docs/internal/fast-xmlupload.md @@ -86,6 +86,7 @@ e.g. `multimedia/dog.jpg` -> `tmp/0b/22/0b22570d-515f-4c3d-a6af-e42b458e7b2b.jp2 **In this case, you need to restart the command several times, until the exit code is 0.** **Only then, all files are processed.** **Unexpected errors result in exit code 1.** +**If this batch splitting happens, every run produces a new pickle file.** You can orchestrate this with a shell script, e.g.: @@ -102,18 +103,18 @@ if [ $exit_code -ne 0 ]; then fi ``` + ## 3. `dsp-tools upload-files` After all files are processed, the upload step can be started. ```bash -dsp-tools upload-files --pkl-file=processing_result_20230414_152810.pkl --processed-dir=tmp +dsp-tools upload-files --processed-dir=tmp ``` The following options are available: -- `-f` | `--pkl-file` (mandatory): path to the pickle file that was written by the processing step - `-d` | `--processed-dir` (mandatory): path to the directory where the processed files are located (same as `--output-dir` in the processing step) - `-n` | `--nthreads` (optional, default 4): number of threads to use for uploading @@ -122,6 +123,9 @@ The following options are available: - `-u` | `--user` (optional, default: `root@example.com`): username (e-mail) used for authentication with the DSP-API - `-p` | `--password` (optional, default: `test`): password used for authentication with the DSP-API +This command will collect all pickle files in the current working directory +that were created by the `process-files` command. + ## 4. `dsp-tools fast-xmlupload` @@ -131,7 +135,9 @@ dsp-tools fast-xmlupload --pkl-file=processing_result_20230414_152810.pkl data.x The following options are available: -- `-f` | `--pkl-file` (mandatory): path to the pickle file that was written by the processing step - `-s` | `--server` (optional, default: `0.0.0.0:3333`): URL of the DSP server - `-u` | `--user` (optional, default: `root@example.com`): username (e-mail) used for authentication with the DSP-API - `-p` | `--password` (optional, default: `test`): password used for authentication with the DSP-API + +This command will collect all pickle files in the current working directory +that were created by the `process-files` command. diff --git a/src/dsp_tools/dsp_tools.py b/src/dsp_tools/dsp_tools.py index 28480d33b..9e7535e41 100644 --- a/src/dsp_tools/dsp_tools.py +++ b/src/dsp_tools/dsp_tools.py @@ -144,7 +144,6 @@ def _make_parser( help="For internal use only: upload already processed files", ) parser_upload_files.set_defaults(action="upload-files") - parser_upload_files.add_argument("-f", "--pkl-file", help="path to pickle file written by 'process-files'") parser_upload_files.add_argument("-d", "--processed-dir", help="path to the directory with the processed files") parser_upload_files.add_argument("-n", "--nthreads", type=int, default=4, help="number of threads to use") parser_upload_files.add_argument("-s", "--server", default=default_dsp_api_url, help=dsp_server_text) @@ -157,7 +156,6 @@ def _make_parser( help="For internal use only: create resources with already uploaded files", ) parser_fast_xmlupload_files.set_defaults(action="fast-xmlupload") - parser_fast_xmlupload_files.add_argument("-f", "--pkl-file", help="path to pickle file written by 'process-files'") parser_fast_xmlupload_files.add_argument("-s", "--server", default=default_dsp_api_url, help=dsp_server_text) parser_fast_xmlupload_files.add_argument("-u", "--user", default=root_user_email, help=username_text) parser_fast_xmlupload_files.add_argument("-p", "--password", default=root_user_pw, help=password_text) @@ -476,7 +474,6 @@ def _call_requested_action(args: argparse.Namespace) -> bool: ) elif args.action == "upload-files": success = upload_files( - pkl_file=args.pkl_file, dir_with_processed_files=args.processed_dir, nthreads=args.nthreads, user=args.user, @@ -487,7 +484,6 @@ def _call_requested_action(args: argparse.Namespace) -> bool: elif args.action == "fast-xmlupload": success = fast_xmlupload( xml_file=args.xml_file, - pkl_file=args.pkl_file, user=args.user, password=args.password, dsp_url=args.server, diff --git a/src/dsp_tools/fast_xmlupload/process_files.py b/src/dsp_tools/fast_xmlupload/process_files.py index 1d7e37a1b..a45c5cf34 100644 --- a/src/dsp_tools/fast_xmlupload/process_files.py +++ b/src/dsp_tools/fast_xmlupload/process_files.py @@ -144,7 +144,8 @@ def _write_result_to_pkl_file(processed_files: list[tuple[Path, Optional[Path]]] Raises: UserError if the file could not be written """ - filename = f"processing_result_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pkl" + filename = Path(f"processing_result_{datetime.now().strftime('%Y-%m-%d_%H.%M.%S.%f')}.pkl") + try: with open(filename, "wb") as pkl_file: pickle.dump(processed_files, pkl_file) diff --git a/src/dsp_tools/fast_xmlupload/upload_files.py b/src/dsp_tools/fast_xmlupload/upload_files.py index fe6f227ba..a6085b263 100644 --- a/src/dsp_tools/fast_xmlupload/upload_files.py +++ b/src/dsp_tools/fast_xmlupload/upload_files.py @@ -9,13 +9,73 @@ from regex import regex from dsp_tools.models.connection import Connection -from dsp_tools.models.exceptions import BaseError +from dsp_tools.models.exceptions import UserError from dsp_tools.utils.logging import get_logger from dsp_tools.utils.shared import login logger = get_logger(__name__) +def _check_processed_dir(dir_with_processed_files: str) -> Path: + """ + Checks the input parameter provided by the user and transforms it into a Path. + + Args: + processed_dir: the directory where the processed files have been written to + + Raises: + UserError: if the directory does not exist + + Returns: + Path object of the directory + """ + dir_with_processed_files_path = Path(dir_with_processed_files) + if not dir_with_processed_files_path.is_dir(): + raise UserError(f"The folder with the processed files is invalid: {dir_with_processed_files}") + return dir_with_processed_files_path + + +def get_pkl_files() -> list[Path]: + """ + Get all pickle files starting with "processing_result_" in the current working directory. + + Raises: + UserError: If no pickle file was found + + Returns: + list of pickle files + """ + pkl_file_paths = [Path(x) for x in glob.glob("processing_result_*.pkl")] + if len(pkl_file_paths) == 0: + raise UserError("No pickle file found. Please run the processing step first.") + return pkl_file_paths + + +def _get_paths_from_pkl_files(pkl_files: list[Path]) -> list[Path]: + """ + Read the pickle file(s) returned by the processing step. + + Args: + pkl_files: pickle file(s) returned by the processing step + + Returns: + list of file paths of the processed files (uuid filenames) + """ + orig_paths_2_processed_paths: list[tuple[Path, Optional[Path]]] = [] + for pkl_file in pkl_files: + orig_paths_2_processed_paths.extend(pickle.loads(pkl_file.read_bytes())) + + processed_paths: list[Path] = [] + for orig_path, processed_path in orig_paths_2_processed_paths: + if processed_path: + processed_paths.append(processed_path) + else: + print(f"{datetime.now()}: WARNING: There is no processed file for {orig_path}") + logger.warning(f"There is no processed file for {orig_path}") + + return processed_paths + + def _get_upload_candidates( dir_with_processed_files: Path, internal_filename_of_processed_file: Path, @@ -189,57 +249,6 @@ def _upload_file( return internal_filename_of_processed_file, True -def _get_paths_from_pkl_file(pkl_file: Path) -> list[Path]: - """ - Read the pickle file returned by the processing step. - - Args: - pkl_file: pickle file returned by the processing step - - Returns: - list of uuid file paths - """ - with open(pkl_file, "rb") as file: - orig_paths_2_processed_paths: list[tuple[Path, Optional[Path]]] = pickle.load(file) - - processed_paths: list[Path] = [] - for orig_processed in orig_paths_2_processed_paths: - if orig_processed[1]: - processed_paths.append(orig_processed[1]) - else: - print(f"{datetime.now()}: WARNING: There is no processed file for {orig_processed[0]}") - logger.warning(f"There is no processed file for {orig_processed[0]}") - - return processed_paths - - -def _check_params( - pkl_file: str, - dir_with_processed_files: str, -) -> Optional[tuple[Path, Path]]: - """ - Checks the input parameters provided by the user and transforms them into the expected types. - - Args: - pkl_file: the XML file the paths are extracted from - processed_dir: the output directory where the created files should be written to - - Returns: - A tuple with the Path objects of the input strings - """ - pkl_file_path = Path(pkl_file) - dir_with_processed_files_path = Path(dir_with_processed_files) - - if not pkl_file_path.is_file(): - print(f"{pkl_file} is not a file") - return None - if not dir_with_processed_files_path.is_dir(): - print(f"{dir_with_processed_files} is not a directory") - return None - - return pkl_file_path, dir_with_processed_files_path - - def _upload_files_in_parallel( dir_with_processed_files: Path, internal_filenames_of_processed_files: list[Path], @@ -313,7 +322,6 @@ def _check_if_all_files_were_uploaded( def upload_files( - pkl_file: str, dir_with_processed_files: str, nthreads: int, user: str, @@ -326,7 +334,6 @@ def upload_files( Before using this method, the files must be processed by the processing step. Args: - pkl_file: pickle file containing the mapping between the original files and the processed files, e.g. Path('multimedia/nested/subfolder/test.tif'), Path('tmp/0b/22/0b22570d-515f-4c3d-a6af-e42b458e7b2b.jp2'). dir_with_processed_files: path to the directory where the processed files are located nthreads: number of threads to use for uploading (optimum depends on the number of CPUs on the server) @@ -338,18 +345,11 @@ def upload_files( Returns: success status """ - # check the input parameters - param_check_result = _check_params( - pkl_file=pkl_file, - dir_with_processed_files=dir_with_processed_files, - ) - if param_check_result: - pkl_file_path, dir_with_processed_files_path = param_check_result - else: - raise BaseError("Error reading the input parameters. Please check them.") + dir_with_processed_files_path = _check_processed_dir(dir_with_processed_files) + pkl_file_paths = get_pkl_files() # read paths from pkl file - internal_filenames_of_processed_files = _get_paths_from_pkl_file(pkl_file=pkl_file_path) + internal_filenames_of_processed_files = _get_paths_from_pkl_files(pkl_files=pkl_file_paths) print(f"{datetime.now()}: Found {len(internal_filenames_of_processed_files)} files to upload...") logger.info(f"Found {len(internal_filenames_of_processed_files)} files to upload...") diff --git a/src/dsp_tools/fast_xmlupload/upload_xml.py b/src/dsp_tools/fast_xmlupload/upload_xml.py index ae1e6eeee..b1c023d10 100644 --- a/src/dsp_tools/fast_xmlupload/upload_xml.py +++ b/src/dsp_tools/fast_xmlupload/upload_xml.py @@ -1,36 +1,42 @@ import pickle from datetime import datetime from pathlib import Path -from typing import Optional +from typing import Optional, cast from lxml import etree -from dsp_tools.models.exceptions import BaseError +from dsp_tools.models.exceptions import UserError from dsp_tools.utils.logging import get_logger from dsp_tools.utils.xml_upload import xml_upload +from dsp_tools.fast_xmlupload.upload_files import get_pkl_files + logger = get_logger(__name__) -def _get_paths_from_pkl_file(pkl_file: Path) -> dict[str, str]: +def _get_paths_from_pkl_files(pkl_files: list[Path]) -> dict[str, str]: """ - Read the pickle file returned by the processing step. + Read the pickle file(s) returned by the processing step. Args: - pkl_file: pickle file returned by the processing step + pkl_files: pickle file(s) returned by the processing step + + Raises: + UserError: If for a file, no derivative was found Returns: dict of original paths to uuid filenames """ - with open(pkl_file, "rb") as file: - orig_path_2_processed_path: list[tuple[Path, Optional[Path]]] = pickle.load(file) + orig_path_2_processed_path: list[tuple[Path, Optional[Path]]] = [] + for pkl_file in pkl_files: + orig_path_2_processed_path.extend(pickle.loads(pkl_file.read_bytes())) orig_path_2_uuid_filename: dict[str, str] = {} for orig_path, processed_path in orig_path_2_processed_path: if processed_path: orig_path_2_uuid_filename[str(orig_path)] = str(processed_path.name) else: - raise BaseError( + raise UserError( f"There is no processed file for {orig_path}. The fast xmlupload cannot be started, " "because the resource that uses this file would fail." ) @@ -43,14 +49,14 @@ def replace_bitstream_paths( orig_path_2_uuid_filename: dict[str, str], ) -> "etree._ElementTree[etree._Element]": """ - Replace the original filepaths in the Tags by the uuid filenames of the processed files. + Replace the original filepaths in the gags by the uuid filenames of the processed files. Args: xml_tree: The parsed original XML tree orig_path_2_uuid_filename: Mapping from original filenames to uuid filenames (from the pickle file) Raises: - BaseError: If for a file, no derivative was found + UserError: If for a file, no derivative was found Returns: The XML tree with the replaced filepaths (modified in place) @@ -60,20 +66,16 @@ def replace_bitstream_paths( if elem.text in orig_path_2_uuid_filename: elem.text = orig_path_2_uuid_filename[elem.text] else: - res_id = "" - res = elem.getparent() - if res: - res_id = f"Resource {res.attrib['id']}: " - raise BaseError( - f"{res_id}Cannot find processed derivatives for {elem.text}. The fast xmlupload cannot be started, " - "because the resource that uses this file would fail." + res_id = cast("etree._Element", elem.getparent()).attrib.get("id") + raise UserError( + f"Resource {res_id}: Cannot find processed derivatives for {elem.text}. " + "The fast xmlupload cannot be started, because the resource that uses this file would fail." ) return xml_tree def fast_xmlupload( xml_file: str, - pkl_file: str, user: str, password: str, dsp_url: str, @@ -89,7 +91,6 @@ def fast_xmlupload( Args: xml_file: path to XML file containing the resources - pkl_file: pickle file containing the mapping between the original files and the processed files, e.g. Path('multimedia/nested/subfolder/test.tif'), Path('tmp/0b/22/0b22570d-515f-4c3d-a6af-e42b458e7b2b.jp2') user: the user's e-mail for login into DSP password: the user's password for login into DSP @@ -100,7 +101,8 @@ def fast_xmlupload( success status """ xml_tree_orig = etree.parse(xml_file) - orig_path_2_uuid_filename = _get_paths_from_pkl_file(pkl_file=Path(pkl_file)) + pkl_files = get_pkl_files() + orig_path_2_uuid_filename = _get_paths_from_pkl_files(pkl_files) xml_tree_replaced = replace_bitstream_paths( xml_tree=xml_tree_orig, orig_path_2_uuid_filename=orig_path_2_uuid_filename, diff --git a/test/e2e/test_fast_xmlupload.py b/test/e2e/test_fast_xmlupload.py index 6602c87fa..1f5fe3650 100644 --- a/test/e2e/test_fast_xmlupload.py +++ b/test/e2e/test_fast_xmlupload.py @@ -39,32 +39,35 @@ def setUpClass(cls) -> None: verbose=False, dump=False, ) - shutil.copytree(cls.input_dir, cls.input_dir / "nested") - shutil.copytree(cls.input_dir / "nested", cls.input_dir / "nested/subfolder") + Path(cls.input_dir / "nested").mkdir() + Path(cls.input_dir / "nested/subfolder").mkdir() + shutil.copy(cls.input_dir / "test.jpg", cls.input_dir / "nested/test.jpg") + shutil.copy(cls.input_dir / "test.jpg", cls.input_dir / "nested/subfolder/test.jpg") @classmethod def tearDownClass(cls) -> None: """ - Is executed after the methods of this class have all run through + Remove the testdata/bitstreams/nested folder and the testdata/preprocessed_files folder. + Is executed after the methods of this class have all run through. """ shutil.rmtree(cls.input_dir / "nested") shutil.rmtree(cls.output_dir) def tearDown(self) -> None: """ - Is executed after each test method + Delete all pickle files, all id2iri files and all "(un)processed_files.txt" files. + Is executed after each test method. """ for pickle_file in list(Path().glob("*.pkl")): pickle_file.unlink() - id2iri_search_results = list(Path().glob("*id2iri_mapping.json")) - if len(id2iri_search_results) == 1: - id2iri_search_results[0].unlink() + for id2iri_file in list(Path().glob("*id2iri_mapping.json")): + id2iri_file.unlink() for txt_file in self.txt_files: Path(txt_file).unlink(missing_ok=True) - def test_fast_xmlupload(self) -> None: + def test_fast_xmlupload_without_batching(self) -> None: """ Test if it is possible to call the 3 steps of the fast XML upload, one after the other. No detailed tests are done here, it is only tested if the 3 steps can be called. @@ -79,11 +82,11 @@ def test_fast_xmlupload(self) -> None: ) self.assertTrue(success_process) - pickle_file = str(list(Path().glob("*.pkl"))[0]) + pickle_files = list(Path().glob("*.pkl")) + self.assertEqual(len(pickle_files), 1) - print(f"test_fast_xmlupload: call upload_files() with pickle file {pickle_file}") + print("test_fast_xmlupload: call upload_files()") success_upload = upload_files( - pkl_file=pickle_file, dir_with_processed_files=self.output_dir, nthreads=4, user=self.user, @@ -96,7 +99,6 @@ def test_fast_xmlupload(self) -> None: print("test_fast_xmlupload: call fast_xmlupload()") success_fast_xmlupload = fast_xmlupload( xml_file=self.xml_file, - pkl_file=pickle_file, user=self.user, password=self.password, dsp_url=self.dsp_url, @@ -104,21 +106,24 @@ def test_fast_xmlupload(self) -> None: ) self.assertTrue(success_fast_xmlupload) - def test_batch_size_of_process_files(self) -> None: + def test_fast_xmlupload_batching(self) -> None: """ Test if the "batch_size" parameter of process_files() function works. - The test file contains 92 bitstreams, so a batch size of 40 should result in 3 batches. + The test file contains 34 bitstreams, so a batch size of 15 should result in 3 batches. The first 2 batches should exit with exit code 2 and success=True, the 3rd batch should exit with exit code 0 and success=True. + + Afterwards, the uploading step and the fast xmlupload step should work with the 3 pickle files. """ def action() -> bool: + print("test_fast_xmlupload_batching: call process_files() with batch size 15") return process_files( input_dir=str(self.input_dir), output_dir=self.output_dir, xml_file=self.xml_file, nthreads=None, - batch_size=40, + batch_size=15, ) for i in range(2): @@ -130,6 +135,30 @@ def action() -> bool: success = action() self.assertTrue(success) + pickle_files = list(Path().glob("*.pkl")) + self.assertEqual(len(pickle_files), 3) + + print("test_fast_xmlupload_batching: call upload_files()") + success_upload = upload_files( + dir_with_processed_files=self.output_dir, + nthreads=4, + user=self.user, + password=self.password, + dsp_url=self.dsp_url, + sipi_url=self.sipi_url, + ) + self.assertTrue(success_upload) + + print("test_fast_xmlupload_batching: call fast_xmlupload()") + success_fast_xmlupload = fast_xmlupload( + xml_file=self.xml_file, + user=self.user, + password=self.password, + dsp_url=self.dsp_url, + sipi_url=self.sipi_url, + ) + self.assertTrue(success_fast_xmlupload) + if __name__ == "__main__": pytest.main([__file__]) diff --git a/testdata/xml-data/test-data-fast-xmlupload.xml b/testdata/xml-data/test-data-fast-xmlupload.xml index 694177cae..f8346c967 100644 --- a/testdata/xml-data/test-data-fast-xmlupload.xml +++ b/testdata/xml-data/test-data-fast-xmlupload.xml @@ -27,275 +27,102 @@ testdata/bitstreams/test with whitespaces.png - - testdata/bitstreams/README.txt + + + testdata/bitstreams/nested/test.jpg - - testdata/bitstreams/nested/README.txt + + testdata/bitstreams/nested/subfolder/test.jpg - - testdata/bitstreams/nested/subfolder/README.txt + + + testdata/bitstreams/README.txt testdata/bitstreams/test.jpg - - testdata/bitstreams/nested/test.jpg - - - testdata/bitstreams/nested/subfolder/test.jpg - testdata/bitstreams/test.jpeg - - testdata/bitstreams/nested/test.jpeg - - - testdata/bitstreams/nested/subfolder/test.jpeg - testdata/bitstreams/test.tif - - testdata/bitstreams/nested/test.tif - - - testdata/bitstreams/nested/subfolder/test.tif - testdata/bitstreams/test.tiff - - testdata/bitstreams/nested/test.tiff - - - testdata/bitstreams/nested/subfolder/test.tiff - testdata/bitstreams/test.jp2 - - testdata/bitstreams/nested/test.jp2 - - - testdata/bitstreams/nested/subfolder/test.jp2 - testdata/bitstreams/test.png - - testdata/bitstreams/nested/test.png - - - testdata/bitstreams/nested/subfolder/test.png - testdata/bitstreams/test.mp4 - - testdata/bitstreams/nested/test.mp4 - - - testdata/bitstreams/nested/subfolder/test.mp4 - testdata/bitstreams/test.7z - - testdata/bitstreams/nested/test.7z - - - testdata/bitstreams/nested/subfolder/test.7z - testdata/bitstreams/test.gz - - testdata/bitstreams/nested/test.gz - - - testdata/bitstreams/nested/subfolder/test.gz - testdata/bitstreams/test.gzip - - testdata/bitstreams/nested/test.gzip - - - testdata/bitstreams/nested/subfolder/test.gzip - testdata/bitstreams/test.tar - - testdata/bitstreams/nested/test.tar - - - testdata/bitstreams/nested/subfolder/test.tar - testdata/bitstreams/test.tar.gz - - testdata/bitstreams/nested/test.tar.gz - - - testdata/bitstreams/nested/subfolder/test.tar.gz - testdata/bitstreams/test.tgz - - testdata/bitstreams/nested/test.tgz - - - testdata/bitstreams/nested/subfolder/test.tgz - testdata/bitstreams/test.z - - testdata/bitstreams/nested/test.z - - - testdata/bitstreams/nested/subfolder/test.z - testdata/bitstreams/test.zip - - testdata/bitstreams/nested/test.zip - - - testdata/bitstreams/nested/subfolder/test.zip - testdata/bitstreams/test.csv - - testdata/bitstreams/nested/test.csv - - - testdata/bitstreams/nested/subfolder/test.csv - testdata/bitstreams/test.txt - - testdata/bitstreams/nested/test.txt - - - testdata/bitstreams/nested/subfolder/test.txt - testdata/bitstreams/test.xml - - testdata/bitstreams/nested/test.xml - - - testdata/bitstreams/nested/subfolder/test.xml - testdata/bitstreams/test.xsd - - testdata/bitstreams/nested/test.xsd - - - testdata/bitstreams/nested/subfolder/test.xsd - testdata/bitstreams/test.xsl - - testdata/bitstreams/nested/test.xsl - - - testdata/bitstreams/nested/subfolder/test.xsl - testdata/bitstreams/test.doc - - testdata/bitstreams/nested/test.doc - - - testdata/bitstreams/nested/subfolder/test.doc - testdata/bitstreams/test.docx - - testdata/bitstreams/nested/test.docx - - - testdata/bitstreams/nested/subfolder/test.docx - testdata/bitstreams/test.pdf - - testdata/bitstreams/nested/test.pdf - - - testdata/bitstreams/nested/subfolder/test.pdf - testdata/bitstreams/test.ppt - - testdata/bitstreams/nested/test.ppt - - - testdata/bitstreams/nested/subfolder/test.ppt - testdata/bitstreams/test.pptx - - testdata/bitstreams/nested/test.pptx - - - testdata/bitstreams/nested/subfolder/test.pptx - testdata/bitstreams/test.xls - - testdata/bitstreams/nested/test.xls - - - testdata/bitstreams/nested/subfolder/test.xls - testdata/bitstreams/test.xlsx - - testdata/bitstreams/nested/test.xlsx - - - testdata/bitstreams/nested/subfolder/test.xlsx - testdata/bitstreams/test.mp3 - - testdata/bitstreams/nested/test.mp3 - - - testdata/bitstreams/nested/subfolder/test.mp3 - testdata/bitstreams/test.wav - - testdata/bitstreams/nested/test.wav - - - testdata/bitstreams/nested/subfolder/test.wav - -