Skip to content
This repository has been archived by the owner on Aug 25, 2024. It is now read-only.

Commit

Permalink
operation: archive: zip and tar file support
Browse files Browse the repository at this point in the history
  • Loading branch information
programmer290399 authored and pdxjohnny committed Jul 1, 2021
1 parent b3a66a9 commit c16f6fa
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Documentation and testing support for notebooks
- Example on how to create operations and use data preprocessing source to train models
https://intel.github.io/dffml/examples/ice_cream.html
- Operations for zip and tar file creation and extraction
### Changed
- Calls to hashlib now go through helper functions
- Build docs using `dffml service dev docs`
Expand Down
98 changes: 98 additions & 0 deletions dffml/operation/archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import tarfile
import zipfile
import pathlib

from ..df.base import op
from ..df.types import Definition


# definitions
DIRECTORY = Definition(name="directory", primitive="str")
ZIP_FILE = Definition(name="zip_file", primitive="str")
TAR_FILE = Definition(name="tar_file", primitive="str")


@op(
inputs={"input_directory_path": DIRECTORY, "output_file_path": ZIP_FILE},
outputs={},
)
async def make_zip_archive(
input_directory_path: str, output_file_path: str,
):
"""
Creates zip file of a directory.
Parameters
----------
input_directory_path : str
Path to directory to be archived
output_file_path : str
Path where the output archive should be saved (should include file name)
"""
with zipfile.ZipFile(output_file_path, "w") as zip:
for file in pathlib.Path(input_directory_path).rglob("*"):
zip.write(file, file.name)


@op(
inputs={"input_file_path": ZIP_FILE, "output_directory_path": DIRECTORY},
outputs={},
)
async def extract_zip_archive(
input_file_path: str, output_directory_path: str,
):
"""
Extracts a given zip file.
Parameters
----------
input_file_path : str
Path to the zip file
output_directory_path : str
Path where all the files should be extracted
"""
with zipfile.ZipFile(input_file_path, "r") as zip:
zip.extractall(output_directory_path)


@op(
inputs={"input_directory_path": DIRECTORY, "output_file_path": TAR_FILE},
outputs={},
)
async def make_tar_archive(
input_directory_path: str, output_file_path: str,
):
"""
Creates tar file of a directory.
Parameters
----------
input_directory_path : str
Path to directory to be archived as a tarfile.
output_file_path : str
Path where the output archive should be saved (should include file name)
"""
with tarfile.open(output_file_path, mode="x") as tar:
for file in pathlib.Path(input_directory_path).rglob("*"):
tar.add(file, file.name)


@op(
inputs={"input_file_path": TAR_FILE, "output_directory_path": DIRECTORY},
outputs={},
)
async def extract_tar_archive(
input_file_path: str, output_directory_path: str,
):
"""
Extracts a given tar file.
Parameters
----------
input_file_path : str
Path to the tar file
output_directory_path : str
Path where all the files should be extracted
"""
with tarfile.open(input_file_path, "r") as tar:
tar.extractall(output_directory_path)
93 changes: 93 additions & 0 deletions tests/operation/test_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from unittest.mock import patch, mock_open

from dffml import run
from dffml.df.types import DataFlow, Input
from dffml.util.asynctestcase import AsyncTestCase
from dffml.operation.archive import (
make_zip_archive,
extract_zip_archive,
make_tar_archive,
extract_tar_archive,
)


def create_dataflow(operation, seed):
dataflow = DataFlow(
operations={operation.op.name: operation},
seed={
Input(value=val, definition=operation.op.inputs[input_name])
for input_name, val in seed.items()
},
implementations={operation.op.name: operation.imp},
)
return dataflow


class TestZipOperations(AsyncTestCase):
test_file_pth = "test/path/to/zip_file.zip"
test_dir_pth = "test/path/to/directory"

async def test_make_zip_op(self):
dataflow = create_dataflow(
make_zip_archive,
{
"input_directory_path": self.test_dir_pth,
"output_file_path": self.test_file_pth,
},
)
m_open = mock_open()
with patch("io.open", m_open), patch(
"zipfile.ZipFile._write_end_record"
):
async for _, _ in run(dataflow):
m_open.assert_called_once_with(self.test_file_pth, "w+b")

async def test_extract_zip_op(self):
dataflow = create_dataflow(
extract_zip_archive,
{
"input_file_path": self.test_file_pth,
"output_directory_path": self.test_dir_pth,
},
)
m_open = mock_open()
with patch("io.open", m_open), patch("zipfile._EndRecData"), patch(
"zipfile.ZipFile._RealGetContents"
):
async for _, _ in run(dataflow):
m_open.assert_called_once_with(self.test_file_pth, "rb")


class TestTarOperations(AsyncTestCase):
test_file_pth = "test/path/to/tar_file.tar"
test_dir_pth = "test/path/to/directory"

async def test_make_tar_archive_op(self):
dataflow = create_dataflow(
make_tar_archive,
{
"input_directory_path": self.test_dir_pth,
"output_file_path": self.test_file_pth,
},
)
m_open = mock_open()
with patch("tarfile.bltn_open", m_open), patch(
"tarfile.TarFile.close"
):
async for _, _ in run(dataflow):
m_open.assert_called_once_with(self.test_file_pth, "xb")

async def test_extract_tar_op(self):
dataflow = create_dataflow(
extract_tar_archive,
{
"input_file_path": self.test_file_pth,
"output_directory_path": self.test_dir_pth,
},
)
m_open = mock_open()
with patch("builtins.open", m_open), patch(
"tarfile.TarFile.extractall"
), patch("tarfile.TarInfo.fromtarfile", m_open):
async for _, _ in run(dataflow):
m_open.assert_any_call("test/path/to/tar_file.tar", "rb")

0 comments on commit c16f6fa

Please sign in to comment.