Skip to content

Commit

Permalink
Fixes zenml-io#2257 : Unifies materializer temporary directory/file c…
Browse files Browse the repository at this point in the history
…reation onto the tempfile module
  • Loading branch information
akesterson committed Mar 22, 2024
1 parent 8a8085d commit e7c815c
Show file tree
Hide file tree
Showing 15 changed files with 163 additions and 194 deletions.
3 changes: 3 additions & 0 deletions src/zenml/artifact_stores/local_artifact_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def ensure_path_local(cls, path: str) -> str:
Raises:
ArtifactStoreInterfaceError: If the given path is not a local path.
"""
# TODO : This would be unnecessary if we prefixed local files with file://
# and this is not going to catch all possibilities anyway so local files should
# refactor to file://
remote_prefixes = ["gs://", "hdfs://", "s3://", "az://", "abfs://"]
if any(path.startswith(prefix) for prefix in remote_prefixes):
raise ArtifactStoreInterfaceError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,22 @@ def load(self, data_type: Type[bento.Bento]) -> bento.Bento:
An bento.Bento object.
"""
# Create a temporary directory to store the model
temp_dir = tempfile.TemporaryDirectory()
with tempfile.TemporaryDirectory() as temp_dir:

# Copy from artifact store to temporary directory
io_utils.copy_dir(self.uri, temp_dir.name)
# Copy from artifact store to temporary directory
io_utils.copy_dir(self.uri, temp_dir.name)

# Load the Bento from the temporary directory
imported_bento = Bento.import_from(
os.path.join(temp_dir.name, DEFAULT_BENTO_FILENAME)
)
# Load the Bento from the temporary directory
imported_bento = Bento.import_from(
os.path.join(temp_dir.name, DEFAULT_BENTO_FILENAME)
)

# Try save the Bento to the local BentoML store
try:
_ = bentoml.get(imported_bento.tag)
except BentoMLException:
imported_bento.save()
return imported_bento
# Try save the Bento to the local BentoML store
try:
_ = bentoml.get(imported_bento.tag)
except BentoMLException:
imported_bento.save()
return imported_bento

def save(self, bento: bento.Bento) -> None:
"""Write to artifact store.
Expand All @@ -74,17 +74,14 @@ def save(self, bento: bento.Bento) -> None:
bento: An bento.Bento object.
"""
# Create a temporary directory to store the model
temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-")
temp_bento_path = os.path.join(temp_dir.name, DEFAULT_BENTO_FILENAME)
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
temp_bento_path = os.path.join(temp_dir.name, DEFAULT_BENTO_FILENAME)

# save the image in a temporary directory
bentoml.export_bento(bento.tag, temp_bento_path)
# save the image in a temporary directory
bentoml.export_bento(bento.tag, temp_bento_path)

# copy the saved image to the artifact store
io_utils.copy_dir(temp_dir.name, self.uri)

# Remove the temporary directory
fileio.rmtree(temp_dir.name)
# copy the saved image to the artifact store
io_utils.copy_dir(temp_dir.name, self.uri)

def extract_metadata(
self, bento: bento.Bento
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,13 @@ def save(self, ds: Union[Dataset, DatasetDict]) -> None:
Args:
ds: The Dataset to write.
"""
temp_dir = TemporaryDirectory()
path = os.path.join(temp_dir.name, DEFAULT_DATASET_DIR)
try:
with TemporaryDirectory() as temp_dir:
path = os.path.join(temp_dir.name, DEFAULT_DATASET_DIR)
ds.save_to_disk(path)
io_utils.copy_dir(
path,
os.path.join(self.uri, DEFAULT_DATASET_DIR),
)
finally:
fileio.rmtree(temp_dir.name)

def extract_metadata(
self, ds: Union[Dataset, DatasetDict]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,30 @@ def load(self, data_type: Type[PreTrainedModel]) -> PreTrainedModel:
Returns:
The model read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir.name
)

config = AutoConfig.from_pretrained(temp_dir.name)
architecture = config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)
with TemporaryDirectory() as temp_dir:
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir.name
)

config = AutoConfig.from_pretrained(temp_dir.name)
architecture = config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)

def save(self, model: PreTrainedModel) -> None:
"""Writes a Model to the specified dir.
Args:
model: The Torch Model to write.
"""
temp_dir = TemporaryDirectory()
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR),
)
with TemporaryDirectory() as temp_dir:
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_PT_MODEL_DIR),
)

def extract_metadata(
self, model: PreTrainedModel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,30 @@ def load(self, data_type: Type[TFPreTrainedModel]) -> TFPreTrainedModel:
Returns:
The model read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir.name
)

config = AutoConfig.from_pretrained(temp_dir.name)
architecture = "TF" + config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)
with TemporaryDirectory() as temp_dir:
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir.name
)

config = AutoConfig.from_pretrained(temp_dir.name)
architecture = "TF" + config.architectures[0]
model_cls = getattr(
importlib.import_module("transformers"), architecture
)
return model_cls.from_pretrained(temp_dir.name)

def save(self, model: TFPreTrainedModel) -> None:
"""Writes a Model to the specified dir.
Args:
model: The TF Model to write.
"""
temp_dir = TemporaryDirectory()
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR),
)
with TemporaryDirectory() as temp_dir:
model.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_TF_MODEL_DIR),
)

def extract_metadata(
self, model: TFPreTrainedModel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,22 @@ def load(self, data_type: Type[Any]) -> PreTrainedTokenizerBase:
Returns:
The tokenizer read from the specified dir.
"""
temp_dir = TemporaryDirectory()
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir.name
)
with TemporaryDirectory() as temp_dir:
io_utils.copy_dir(
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir.name
)

return AutoTokenizer.from_pretrained(temp_dir.name)
return AutoTokenizer.from_pretrained(temp_dir.name)

def save(self, tokenizer: Type[Any]) -> None:
"""Writes a Tokenizer to the specified dir.
Args:
tokenizer: The HFTokenizer to write.
"""
temp_dir = TemporaryDirectory()
tokenizer.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR),
)
with TemporaryDirectory() as temp_dir:
tokenizer.save_pretrained(temp_dir.name)
io_utils.copy_dir(
temp_dir.name,
os.path.join(self.uri, DEFAULT_TOKENIZER_DIR),
)
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,14 @@ def load(self, data_type: Type[Any]) -> lgb.Booster:
filepath = os.path.join(self.uri, DEFAULT_FILENAME)

# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
booster = lgb.Booster(model_file=temp_file)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
booster = lgb.Booster(model_file=temp_file)

# Cleanup and return
fileio.rmtree(temp_dir)
return booster
return booster

def save(self, booster: lgb.Booster) -> None:
"""Creates a JSON serialization for a lightgbm Booster model.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,15 @@ def load(self, data_type: Type[Any]) -> lgb.Dataset:
filepath = os.path.join(self.uri, DEFAULT_FILENAME)

# Create a temporary folder
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)

# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
matrix = lgb.Dataset(temp_file, free_raw_data=False)
# Copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
matrix = lgb.Dataset(temp_file, free_raw_data=False)

# No clean up this time because matrix is lazy loaded
return matrix
# No clean up this time because matrix is lazy loaded
return matrix

def save(self, matrix: lgb.Dataset) -> None:
"""Creates a binary serialization for a lightgbm.Dataset object.
Expand All @@ -66,13 +66,12 @@ def save(self, matrix: lgb.Dataset) -> None:
filepath = os.path.join(self.uri, DEFAULT_FILENAME)

# Make a temporary phantom artifact
temp_dir = tempfile.mkdtemp(prefix="zenml-temp-")
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
matrix.save_binary(temp_file)
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME)
matrix.save_binary(temp_file)

# Copy it into artifact store
fileio.copy(temp_file, filepath)
fileio.rmtree(temp_dir)
# Copy it into artifact store
fileio.copy(temp_file, filepath)

def extract_metadata(
self, matrix: lgb.Dataset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,34 +58,33 @@ def load(self, data_type: Type[Image.Image]) -> Image.Image:
filepath = [file for file in files if not fileio.isdir(file)][0]

# create a temporary folder
temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-")
temp_file = os.path.join(
temp_dir.name,
f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}",
)
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
temp_file = os.path.join(
temp_dir.name,
f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}",
)

# copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
return Image.open(temp_file)
# copy from artifact store to temporary file
fileio.copy(filepath, temp_file)
return Image.open(temp_file)

def save(self, image: Image.Image) -> None:
"""Write to artifact store.
Args:
image: An Image.Image object.
"""
temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-")
file_extension = image.format or DEFAULT_IMAGE_EXTENSION
full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}"
temp_image_path = os.path.join(temp_dir.name, full_filename)
with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir:
file_extension = image.format or DEFAULT_IMAGE_EXTENSION
full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}"
temp_image_path = os.path.join(temp_dir.name, full_filename)

# save the image in a temporary directory
image.save(temp_image_path)
# save the image in a temporary directory
image.save(temp_image_path)

# copy the saved image to the artifact store
artifact_store_path = os.path.join(self.uri, full_filename)
io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined]
temp_dir.cleanup()
# copy the saved image to the artifact store
artifact_store_path = os.path.join(self.uri, full_filename)
io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined]

def save_visualizations(
self, image: Image.Image
Expand Down
Loading

0 comments on commit e7c815c

Please sign in to comment.