This repository has been archived by the owner on Aug 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 138
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
operation: archive: zip and tar file support
- Loading branch information
1 parent
b3a66a9
commit c16f6fa
Showing
3 changed files
with
192 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import tarfile | ||
import zipfile | ||
import pathlib | ||
|
||
from ..df.base import op | ||
from ..df.types import Definition | ||
|
||
|
||
# definitions | ||
DIRECTORY = Definition(name="directory", primitive="str") | ||
ZIP_FILE = Definition(name="zip_file", primitive="str") | ||
TAR_FILE = Definition(name="tar_file", primitive="str") | ||
|
||
|
||
@op( | ||
inputs={"input_directory_path": DIRECTORY, "output_file_path": ZIP_FILE}, | ||
outputs={}, | ||
) | ||
async def make_zip_archive( | ||
input_directory_path: str, output_file_path: str, | ||
): | ||
""" | ||
Creates zip file of a directory. | ||
Parameters | ||
---------- | ||
input_directory_path : str | ||
Path to directory to be archived | ||
output_file_path : str | ||
Path where the output archive should be saved (should include file name) | ||
""" | ||
with zipfile.ZipFile(output_file_path, "w") as zip: | ||
for file in pathlib.Path(input_directory_path).rglob("*"): | ||
zip.write(file, file.name) | ||
|
||
|
||
@op( | ||
inputs={"input_file_path": ZIP_FILE, "output_directory_path": DIRECTORY}, | ||
outputs={}, | ||
) | ||
async def extract_zip_archive( | ||
input_file_path: str, output_directory_path: str, | ||
): | ||
""" | ||
Extracts a given zip file. | ||
Parameters | ||
---------- | ||
input_file_path : str | ||
Path to the zip file | ||
output_directory_path : str | ||
Path where all the files should be extracted | ||
""" | ||
with zipfile.ZipFile(input_file_path, "r") as zip: | ||
zip.extractall(output_directory_path) | ||
|
||
|
||
@op( | ||
inputs={"input_directory_path": DIRECTORY, "output_file_path": TAR_FILE}, | ||
outputs={}, | ||
) | ||
async def make_tar_archive( | ||
input_directory_path: str, output_file_path: str, | ||
): | ||
""" | ||
Creates tar file of a directory. | ||
Parameters | ||
---------- | ||
input_directory_path : str | ||
Path to directory to be archived as a tarfile. | ||
output_file_path : str | ||
Path where the output archive should be saved (should include file name) | ||
""" | ||
with tarfile.open(output_file_path, mode="x") as tar: | ||
for file in pathlib.Path(input_directory_path).rglob("*"): | ||
tar.add(file, file.name) | ||
|
||
|
||
@op( | ||
inputs={"input_file_path": TAR_FILE, "output_directory_path": DIRECTORY}, | ||
outputs={}, | ||
) | ||
async def extract_tar_archive( | ||
input_file_path: str, output_directory_path: str, | ||
): | ||
""" | ||
Extracts a given tar file. | ||
Parameters | ||
---------- | ||
input_file_path : str | ||
Path to the tar file | ||
output_directory_path : str | ||
Path where all the files should be extracted | ||
""" | ||
with tarfile.open(input_file_path, "r") as tar: | ||
tar.extractall(output_directory_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from unittest.mock import patch, mock_open | ||
|
||
from dffml import run | ||
from dffml.df.types import DataFlow, Input | ||
from dffml.util.asynctestcase import AsyncTestCase | ||
from dffml.operation.archive import ( | ||
make_zip_archive, | ||
extract_zip_archive, | ||
make_tar_archive, | ||
extract_tar_archive, | ||
) | ||
|
||
|
||
def create_dataflow(operation, seed): | ||
dataflow = DataFlow( | ||
operations={operation.op.name: operation}, | ||
seed={ | ||
Input(value=val, definition=operation.op.inputs[input_name]) | ||
for input_name, val in seed.items() | ||
}, | ||
implementations={operation.op.name: operation.imp}, | ||
) | ||
return dataflow | ||
|
||
|
||
class TestZipOperations(AsyncTestCase): | ||
test_file_pth = "test/path/to/zip_file.zip" | ||
test_dir_pth = "test/path/to/directory" | ||
|
||
async def test_make_zip_op(self): | ||
dataflow = create_dataflow( | ||
make_zip_archive, | ||
{ | ||
"input_directory_path": self.test_dir_pth, | ||
"output_file_path": self.test_file_pth, | ||
}, | ||
) | ||
m_open = mock_open() | ||
with patch("io.open", m_open), patch( | ||
"zipfile.ZipFile._write_end_record" | ||
): | ||
async for _, _ in run(dataflow): | ||
m_open.assert_called_once_with(self.test_file_pth, "w+b") | ||
|
||
async def test_extract_zip_op(self): | ||
dataflow = create_dataflow( | ||
extract_zip_archive, | ||
{ | ||
"input_file_path": self.test_file_pth, | ||
"output_directory_path": self.test_dir_pth, | ||
}, | ||
) | ||
m_open = mock_open() | ||
with patch("io.open", m_open), patch("zipfile._EndRecData"), patch( | ||
"zipfile.ZipFile._RealGetContents" | ||
): | ||
async for _, _ in run(dataflow): | ||
m_open.assert_called_once_with(self.test_file_pth, "rb") | ||
|
||
|
||
class TestTarOperations(AsyncTestCase): | ||
test_file_pth = "test/path/to/tar_file.tar" | ||
test_dir_pth = "test/path/to/directory" | ||
|
||
async def test_make_tar_archive_op(self): | ||
dataflow = create_dataflow( | ||
make_tar_archive, | ||
{ | ||
"input_directory_path": self.test_dir_pth, | ||
"output_file_path": self.test_file_pth, | ||
}, | ||
) | ||
m_open = mock_open() | ||
with patch("tarfile.bltn_open", m_open), patch( | ||
"tarfile.TarFile.close" | ||
): | ||
async for _, _ in run(dataflow): | ||
m_open.assert_called_once_with(self.test_file_pth, "xb") | ||
|
||
async def test_extract_tar_op(self): | ||
dataflow = create_dataflow( | ||
extract_tar_archive, | ||
{ | ||
"input_file_path": self.test_file_pth, | ||
"output_directory_path": self.test_dir_pth, | ||
}, | ||
) | ||
m_open = mock_open() | ||
with patch("builtins.open", m_open), patch( | ||
"tarfile.TarFile.extractall" | ||
), patch("tarfile.TarInfo.fromtarfile", m_open): | ||
async for _, _ in run(dataflow): | ||
m_open.assert_any_call("test/path/to/tar_file.tar", "rb") |