diff --git a/docs/book/user-guide/advanced-guide/data-management/handle-custom-data-types.md b/docs/book/user-guide/advanced-guide/data-management/handle-custom-data-types.md index c925359c55a..111f5a7e1ce 100644 --- a/docs/book/user-guide/advanced-guide/data-management/handle-custom-data-types.md +++ b/docs/book/user-guide/advanced-guide/data-management/handle-custom-data-types.md @@ -250,6 +250,22 @@ The `load()` and `save()` methods define the serialization and deserialization o You will need to override these methods according to how you plan to serialize your objects. E.g., if you have custom PyTorch classes as `ASSOCIATED_TYPES`, then you might want to use `torch.save()` and `torch.load()` here. +It is a very common practice to use temporary files and directories as an intermediate step in a materializer's `load()` or `save()` method. Materializers using this pattern must take care to clean up after themselves even in the case of unexpected exceptions. The established pattern for this is to use the [`tempfile`](https://docs.python.org/3/library/tempfile.html) module's context handlers. These are a simple and efficient way to create and clean up temporary files and directories. `tempfile` is part of Python's standard cross-platform library. For example: + +```python + def save(self, model: TFPreTrainedModel) -> None: + """Writes a Model to the specified dir. + + Args: + model: The TF Model to write. + """ + with tempfile.TemporaryDirectory() as temp_dir: + # Do something with the model in the temporary directory + # Save it to the artifact store + # When your code reaches this point, whether through normal flow or through + # an unhandled exception, your entire temporary directory is cleaned up. +``` + #### (Optional) How to Visualize the Artifact Optionally, you can override the `save_visualizations()` method to automatically save visualizations for all artifacts saved by your materializer. These visualizations are then shown next to your artifacts in the dashboard: diff --git a/src/zenml/artifact_stores/local_artifact_store.py b/src/zenml/artifact_stores/local_artifact_store.py index 690e4c6c326..b1e4f2f6591 100644 --- a/src/zenml/artifact_stores/local_artifact_store.py +++ b/src/zenml/artifact_stores/local_artifact_store.py @@ -63,6 +63,9 @@ def ensure_path_local(cls, path: str) -> str: Raises: ArtifactStoreInterfaceError: If the given path is not a local path. """ + # TODO : This would be unnecessary if we prefixed local files with file:// + # and this is not going to catch all possibilities anyway so local files should + # refactor to file:// remote_prefixes = ["gs://", "hdfs://", "s3://", "az://", "abfs://"] if any(path.startswith(prefix) for prefix in remote_prefixes): raise ArtifactStoreInterfaceError( diff --git a/src/zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py b/src/zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py index 49cd3bb3716..f7dc560d676 100644 --- a/src/zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py +++ b/src/zenml/integrations/bentoml/materializers/bentoml_bento_materializer.py @@ -23,7 +23,6 @@ from zenml.enums import ArtifactType from zenml.integrations.bentoml.constants import DEFAULT_BENTO_FILENAME -from zenml.io import fileio from zenml.logger import get_logger from zenml.materializers.base_materializer import BaseMaterializer from zenml.utils import io_utils @@ -50,22 +49,21 @@ def load(self, data_type: Type[bento.Bento]) -> bento.Bento: An bento.Bento object. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() - - # Copy from artifact store to temporary directory - io_utils.copy_dir(self.uri, temp_dir.name) - - # Load the Bento from the temporary directory - imported_bento = Bento.import_from( - os.path.join(temp_dir.name, DEFAULT_BENTO_FILENAME) - ) - - # Try save the Bento to the local BentoML store - try: - _ = bentoml.get(imported_bento.tag) - except BentoMLException: - imported_bento.save() - return imported_bento + with tempfile.TemporaryDirectory() as temp_dir: + # Copy from artifact store to temporary directory + io_utils.copy_dir(self.uri, temp_dir) + + # Load the Bento from the temporary directory + imported_bento = Bento.import_from( + os.path.join(temp_dir, DEFAULT_BENTO_FILENAME) + ) + + # Try save the Bento to the local BentoML store + try: + _ = bentoml.get(imported_bento.tag) + except BentoMLException: + imported_bento.save() + return imported_bento def save(self, bento: bento.Bento) -> None: """Write to artifact store. @@ -74,17 +72,14 @@ def save(self, bento: bento.Bento) -> None: bento: An bento.Bento object. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-") - temp_bento_path = os.path.join(temp_dir.name, DEFAULT_BENTO_FILENAME) - - # save the image in a temporary directory - bentoml.export_bento(bento.tag, temp_bento_path) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_bento_path = os.path.join(temp_dir, DEFAULT_BENTO_FILENAME) - # copy the saved image to the artifact store - io_utils.copy_dir(temp_dir.name, self.uri) + # save the image in a temporary directory + bentoml.export_bento(bento.tag, temp_bento_path) - # Remove the temporary directory - fileio.rmtree(temp_dir.name) + # copy the saved image to the artifact store + io_utils.copy_dir(temp_dir, self.uri) def extract_metadata( self, bento: bento.Bento diff --git a/src/zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py b/src/zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py index 7070e3d3b9e..a08cb2a46f7 100644 --- a/src/zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py +++ b/src/zenml/integrations/huggingface/materializers/huggingface_datasets_materializer.py @@ -22,7 +22,6 @@ from datasets.dataset_dict import DatasetDict from zenml.enums import ArtifactType -from zenml.io import fileio from zenml.materializers.base_materializer import BaseMaterializer from zenml.materializers.pandas_materializer import PandasMaterializer from zenml.utils import io_utils @@ -65,16 +64,13 @@ def save(self, ds: Union[Dataset, DatasetDict]) -> None: Args: ds: The Dataset to write. """ - temp_dir = TemporaryDirectory() - path = os.path.join(temp_dir.name, DEFAULT_DATASET_DIR) - try: + with TemporaryDirectory() as temp_dir: + path = os.path.join(temp_dir, DEFAULT_DATASET_DIR) ds.save_to_disk(path) io_utils.copy_dir( path, os.path.join(self.uri, DEFAULT_DATASET_DIR), ) - finally: - fileio.rmtree(temp_dir.name) def extract_metadata( self, ds: Union[Dataset, DatasetDict] diff --git a/src/zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py b/src/zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py index 6c8579f521c..3dadd95c864 100644 --- a/src/zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py +++ b/src/zenml/integrations/huggingface/materializers/huggingface_pt_model_materializer.py @@ -46,17 +46,17 @@ def load(self, data_type: Type[PreTrainedModel]) -> PreTrainedModel: Returns: The model read from the specified dir. """ - temp_dir = TemporaryDirectory() - io_utils.copy_dir( - os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir.name - ) - - config = AutoConfig.from_pretrained(temp_dir.name) - architecture = config.architectures[0] - model_cls = getattr( - importlib.import_module("transformers"), architecture - ) - return model_cls.from_pretrained(temp_dir.name) + with TemporaryDirectory() as temp_dir: + io_utils.copy_dir( + os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), temp_dir + ) + + config = AutoConfig.from_pretrained(temp_dir) + architecture = config.architectures[0] + model_cls = getattr( + importlib.import_module("transformers"), architecture + ) + return model_cls.from_pretrained(temp_dir) def save(self, model: PreTrainedModel) -> None: """Writes a Model to the specified dir. @@ -64,12 +64,12 @@ def save(self, model: PreTrainedModel) -> None: Args: model: The Torch Model to write. """ - temp_dir = TemporaryDirectory() - model.save_pretrained(temp_dir.name) - io_utils.copy_dir( - temp_dir.name, - os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), - ) + with TemporaryDirectory() as temp_dir: + model.save_pretrained(temp_dir) + io_utils.copy_dir( + temp_dir, + os.path.join(self.uri, DEFAULT_PT_MODEL_DIR), + ) def extract_metadata( self, model: PreTrainedModel diff --git a/src/zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py b/src/zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py index 054524a3a69..5886c30890f 100644 --- a/src/zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py +++ b/src/zenml/integrations/huggingface/materializers/huggingface_tf_model_materializer.py @@ -46,17 +46,17 @@ def load(self, data_type: Type[TFPreTrainedModel]) -> TFPreTrainedModel: Returns: The model read from the specified dir. """ - temp_dir = TemporaryDirectory() - io_utils.copy_dir( - os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir.name - ) - - config = AutoConfig.from_pretrained(temp_dir.name) - architecture = "TF" + config.architectures[0] - model_cls = getattr( - importlib.import_module("transformers"), architecture - ) - return model_cls.from_pretrained(temp_dir.name) + with TemporaryDirectory() as temp_dir: + io_utils.copy_dir( + os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), temp_dir + ) + + config = AutoConfig.from_pretrained(temp_dir) + architecture = "TF" + config.architectures[0] + model_cls = getattr( + importlib.import_module("transformers"), architecture + ) + return model_cls.from_pretrained(temp_dir) def save(self, model: TFPreTrainedModel) -> None: """Writes a Model to the specified dir. @@ -64,12 +64,12 @@ def save(self, model: TFPreTrainedModel) -> None: Args: model: The TF Model to write. """ - temp_dir = TemporaryDirectory() - model.save_pretrained(temp_dir.name) - io_utils.copy_dir( - temp_dir.name, - os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), - ) + with TemporaryDirectory() as temp_dir: + model.save_pretrained(temp_dir) + io_utils.copy_dir( + temp_dir, + os.path.join(self.uri, DEFAULT_TF_MODEL_DIR), + ) def extract_metadata( self, model: TFPreTrainedModel diff --git a/src/zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py b/src/zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py index ae369cdc584..f0843cd58bb 100644 --- a/src/zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py +++ b/src/zenml/integrations/huggingface/materializers/huggingface_tokenizer_materializer.py @@ -46,12 +46,12 @@ def load(self, data_type: Type[Any]) -> PreTrainedTokenizerBase: Returns: The tokenizer read from the specified dir. """ - temp_dir = TemporaryDirectory() - io_utils.copy_dir( - os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir.name - ) + with TemporaryDirectory() as temp_dir: + io_utils.copy_dir( + os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), temp_dir + ) - return AutoTokenizer.from_pretrained(temp_dir.name) + return AutoTokenizer.from_pretrained(temp_dir) def save(self, tokenizer: Type[Any]) -> None: """Writes a Tokenizer to the specified dir. @@ -59,9 +59,9 @@ def save(self, tokenizer: Type[Any]) -> None: Args: tokenizer: The HFTokenizer to write. """ - temp_dir = TemporaryDirectory() - tokenizer.save_pretrained(temp_dir.name) - io_utils.copy_dir( - temp_dir.name, - os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), - ) + with TemporaryDirectory() as temp_dir: + tokenizer.save_pretrained(temp_dir) + io_utils.copy_dir( + temp_dir, + os.path.join(self.uri, DEFAULT_TOKENIZER_DIR), + ) diff --git a/src/zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py b/src/zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py index faea9921705..e983e2a6959 100644 --- a/src/zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py +++ b/src/zenml/integrations/lightgbm/materializers/lightgbm_booster_materializer.py @@ -44,16 +44,14 @@ def load(self, data_type: Type[Any]) -> lgb.Booster: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - booster = lgb.Booster(model_file=temp_file) + # Copy from artifact store to temporary file + fileio.copy(filepath, temp_file) + booster = lgb.Booster(model_file=temp_file) - # Cleanup and return - fileio.rmtree(temp_dir) - return booster + return booster def save(self, booster: lgb.Booster) -> None: """Creates a JSON serialization for a lightgbm Booster model. diff --git a/src/zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py b/src/zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py index fc8512c9308..512441adf1c 100644 --- a/src/zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py +++ b/src/zenml/integrations/lightgbm/materializers/lightgbm_dataset_materializer.py @@ -47,15 +47,15 @@ def load(self, data_type: Type[Any]) -> lgb.Dataset: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - matrix = lgb.Dataset(temp_file, free_raw_data=False) + # Copy from artifact store to temporary file + fileio.copy(filepath, temp_file) + matrix = lgb.Dataset(temp_file, free_raw_data=False) - # No clean up this time because matrix is lazy loaded - return matrix + # No clean up this time because matrix is lazy loaded + return matrix def save(self, matrix: lgb.Dataset) -> None: """Creates a binary serialization for a lightgbm.Dataset object. @@ -66,13 +66,12 @@ def save(self, matrix: lgb.Dataset) -> None: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Make a temporary phantom artifact - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - matrix.save_binary(temp_file) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + matrix.save_binary(temp_file) - # Copy it into artifact store - fileio.copy(temp_file, filepath) - fileio.rmtree(temp_dir) + # Copy it into artifact store + fileio.copy(temp_file, filepath) def extract_metadata( self, matrix: lgb.Dataset diff --git a/src/zenml/integrations/pillow/materializers/pillow_image_materializer.py b/src/zenml/integrations/pillow/materializers/pillow_image_materializer.py index a44e2066386..5a4a7dadd04 100644 --- a/src/zenml/integrations/pillow/materializers/pillow_image_materializer.py +++ b/src/zenml/integrations/pillow/materializers/pillow_image_materializer.py @@ -58,15 +58,15 @@ def load(self, data_type: Type[Image.Image]) -> Image.Image: filepath = [file for file in files if not fileio.isdir(file)][0] # create a temporary folder - temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-") - temp_file = os.path.join( - temp_dir.name, - f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}", - ) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join( + temp_dir, + f"{DEFAULT_IMAGE_FILENAME}{os.path.splitext(filepath)[1]}", + ) - # copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - return Image.open(temp_file) + # copy from artifact store to temporary file + fileio.copy(filepath, temp_file) + return Image.open(temp_file) def save(self, image: Image.Image) -> None: """Write to artifact store. @@ -74,18 +74,17 @@ def save(self, image: Image.Image) -> None: Args: image: An Image.Image object. """ - temp_dir = tempfile.TemporaryDirectory(prefix="zenml-temp-") - file_extension = image.format or DEFAULT_IMAGE_EXTENSION - full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}" - temp_image_path = os.path.join(temp_dir.name, full_filename) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + file_extension = image.format or DEFAULT_IMAGE_EXTENSION + full_filename = f"{DEFAULT_IMAGE_FILENAME}.{file_extension}" + temp_image_path = os.path.join(temp_dir, full_filename) - # save the image in a temporary directory - image.save(temp_image_path) + # save the image in a temporary directory + image.save(temp_image_path) - # copy the saved image to the artifact store - artifact_store_path = os.path.join(self.uri, full_filename) - io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined] - temp_dir.cleanup() + # copy the saved image to the artifact store + artifact_store_path = os.path.join(self.uri, full_filename) + io_utils.copy(temp_image_path, artifact_store_path, overwrite=True) # type: ignore[attr-defined] def save_visualizations( self, image: Image.Image diff --git a/src/zenml/integrations/polars/materializers/dataframe_materializer.py b/src/zenml/integrations/polars/materializers/dataframe_materializer.py index 27925cbf195..a458c52db9e 100644 --- a/src/zenml/integrations/polars/materializers/dataframe_materializer.py +++ b/src/zenml/integrations/polars/materializers/dataframe_materializer.py @@ -22,7 +22,6 @@ import pyarrow.parquet as pq # type: ignore from zenml.enums import ArtifactType -from zenml.io import fileio from zenml.materializers.base_materializer import BaseMaterializer from zenml.utils import io_utils @@ -46,34 +45,30 @@ def load(self, data_type: Type[Any]) -> Any: A Polars data frame or series. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() + with tempfile.TemporaryDirectory() as temp_dir: + # Copy from artifact store to temporary directory + io_utils.copy_dir(self.uri, temp_dir) - # Copy from artifact store to temporary directory - io_utils.copy_dir(self.uri, temp_dir.name) - - # Load the data from the temporary directory - table = pq.read_table( - os.path.join(temp_dir.name, "dataframe.parquet").replace("\\", "/") - ) - - # If the data is of type pl.Series, convert it back to a pyarrow array - # instead of a table. - if ( - table.schema.metadata - and b"zenml_is_pl_series" in table.schema.metadata - ): - isinstance_bytes = table.schema.metadata[b"zenml_is_pl_series"] - isinstance_series = bool.from_bytes(isinstance_bytes, "big") - if isinstance_series: - table = table.column(0) + # Load the data from the temporary directory + table = pq.read_table( + os.path.join(temp_dir, "dataframe.parquet").replace("\\", "/") + ) - # Convert the table to a Polars data frame or series - data = pl.from_arrow(table) + # If the data is of type pl.Series, convert it back to a pyarrow array + # instead of a table. + if ( + table.schema.metadata + and b"zenml_is_pl_series" in table.schema.metadata + ): + isinstance_bytes = table.schema.metadata[b"zenml_is_pl_series"] + isinstance_series = bool.from_bytes(isinstance_bytes, "big") + if isinstance_series: + table = table.column(0) - # Cleanup and return - fileio.rmtree(temp_dir.name) + # Convert the table to a Polars data frame or series + data = pl.from_arrow(table) - return data + return data def save(self, data: Union[pl.DataFrame, pl.Series]) -> None: """Writes Polars data to the artifact store. @@ -108,14 +103,10 @@ def save(self, data: Union[pl.DataFrame, pl.Series]) -> None: ) # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() - - # Write the table to a Parquet file - path = os.path.join(temp_dir.name, "dataframe.parquet").replace( - "\\", "/" - ) - pq.write_table(table, path) # Uses lz4 compression by default - io_utils.copy_dir(temp_dir.name, self.uri) - - # Remove the temporary directory - fileio.rmtree(temp_dir.name) + with tempfile.TemporaryDirectory() as temp_dir: + # Write the table to a Parquet file + path = os.path.join(temp_dir, "dataframe.parquet").replace( + "\\", "/" + ) + pq.write_table(table, path) # Uses lz4 compression by default + io_utils.copy_dir(temp_dir, self.uri) diff --git a/src/zenml/integrations/pycaret/materializers/model_materializer.py b/src/zenml/integrations/pycaret/materializers/model_materializer.py index a935de2e521..df34fd26eed 100644 --- a/src/zenml/integrations/pycaret/materializers/model_materializer.py +++ b/src/zenml/integrations/pycaret/materializers/model_materializer.py @@ -65,7 +65,6 @@ from xgboost import XGBClassifier, XGBRegressor from zenml.enums import ArtifactType -from zenml.io import fileio from zenml.materializers.base_materializer import BaseMaterializer from zenml.utils import io_utils @@ -134,18 +133,14 @@ def load(self, data_type: Type[Any]) -> Any: A PyCaret model. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() + with tempfile.TemporaryDirectory() as temp_dir: + # Copy from artifact store to temporary directory + io_utils.copy_dir(self.uri, temp_dir) - # Copy from artifact store to temporary directory - io_utils.copy_dir(self.uri, temp_dir.name) + # Load the model from the temporary directory + model = load_model(temp_dir) - # Load the model from the temporary directory - model = load_model(temp_dir.name) - - # Cleanup and return - fileio.rmtree(temp_dir.name) - - return model + return model def save(self, model: Any) -> None: """Writes a PyCaret model to the artifact store. @@ -154,9 +149,6 @@ def save(self, model: Any) -> None: model: Any of the supported models. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() - save_model(model, temp_dir.name) - io_utils.copy_dir(temp_dir.name, self.uri) - - # Remove the temporary directory - fileio.rmtree(temp_dir.name) + with tempfile.TemporaryDirectory() as temp_dir: + save_model(model, temp_dir) + io_utils.copy_dir(temp_dir, self.uri) diff --git a/src/zenml/integrations/tensorflow/materializers/keras_materializer.py b/src/zenml/integrations/tensorflow/materializers/keras_materializer.py index cae61dce8bd..809927a9c1c 100644 --- a/src/zenml/integrations/tensorflow/materializers/keras_materializer.py +++ b/src/zenml/integrations/tensorflow/materializers/keras_materializer.py @@ -20,7 +20,6 @@ from tensorflow.python.keras.utils.layer_utils import count_params from zenml.enums import ArtifactType -from zenml.io import fileio from zenml.materializers.base_materializer import BaseMaterializer from zenml.utils import io_utils @@ -44,18 +43,14 @@ def load(self, data_type: Type[Any]) -> keras.Model: A tf.keras.Model model. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() + with tempfile.TemporaryDirectory() as temp_dir: + # Copy from artifact store to temporary directory + io_utils.copy_dir(self.uri, temp_dir) - # Copy from artifact store to temporary directory - io_utils.copy_dir(self.uri, temp_dir.name) + # Load the model from the temporary directory + model = keras.models.load_model(temp_dir) - # Load the model from the temporary directory - model = keras.models.load_model(temp_dir.name) - - # Cleanup and return - fileio.rmtree(temp_dir.name) - - return model + return model def save(self, model: keras.Model) -> None: """Writes a keras model to the artifact store. @@ -64,12 +59,9 @@ def save(self, model: keras.Model) -> None: model: A tf.keras.Model model. """ # Create a temporary directory to store the model - temp_dir = tempfile.TemporaryDirectory() - model.save(temp_dir.name) - io_utils.copy_dir(temp_dir.name, self.uri) - - # Remove the temporary directory - fileio.rmtree(temp_dir.name) + with tempfile.TemporaryDirectory() as temp_dir: + model.save(temp_dir) + io_utils.copy_dir(temp_dir, self.uri) def extract_metadata( self, model: keras.Model diff --git a/src/zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py b/src/zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py index 47e5eb62a73..76b465e22fa 100644 --- a/src/zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py +++ b/src/zenml/integrations/tensorflow/materializers/tf_dataset_materializer.py @@ -20,7 +20,6 @@ import tensorflow as tf from zenml.enums import ArtifactType -from zenml.io import fileio from zenml.materializers.base_materializer import BaseMaterializer from zenml.utils import io_utils @@ -59,15 +58,12 @@ def save(self, dataset: tf.data.Dataset) -> None: Args: dataset: The dataset to persist. """ - temp_dir = tempfile.TemporaryDirectory() - path = os.path.join(temp_dir.name, DEFAULT_FILENAME) - try: + with tempfile.TemporaryDirectory() as temp_dir: + path = os.path.join(temp_dir, DEFAULT_FILENAME) tf.data.experimental.save( dataset, path, compression=None, shard_func=None ) - io_utils.copy_dir(temp_dir.name, self.uri) - finally: - fileio.rmtree(temp_dir.name) + io_utils.copy_dir(temp_dir, self.uri) def extract_metadata( self, dataset: tf.data.Dataset diff --git a/src/zenml/integrations/whylogs/materializers/whylogs_materializer.py b/src/zenml/integrations/whylogs/materializers/whylogs_materializer.py index 7f93bdbb692..f3780306dfb 100644 --- a/src/zenml/integrations/whylogs/materializers/whylogs_materializer.py +++ b/src/zenml/integrations/whylogs/materializers/whylogs_materializer.py @@ -52,17 +52,14 @@ def load(self, data_type: Type[Any]) -> DatasetProfileView: filepath = os.path.join(self.uri, PROFILE_FILENAME) # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(temp_dir, PROFILE_FILENAME) - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - profile_view = DatasetProfileView.read(temp_file) + # Copy from artifact store to temporary file + fileio.copy(filepath, temp_file) + profile_view = DatasetProfileView.read(temp_file) - # Cleanup and return - fileio.rmtree(temp_dir) - - return profile_view + return profile_view def save(self, profile_view: DatasetProfileView) -> None: """Writes a whylogs dataset profile view. @@ -73,21 +70,21 @@ def save(self, profile_view: DatasetProfileView) -> None: filepath = os.path.join(self.uri, PROFILE_FILENAME) # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), PROFILE_FILENAME) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(temp_dir, PROFILE_FILENAME) - profile_view.write(temp_file) + profile_view.write(temp_file) - # Copy it into artifact store - fileio.copy(temp_file, filepath) - fileio.rmtree(temp_dir) + # Copy it into artifact store + fileio.copy(temp_file, filepath) + fileio.rmtree(temp_dir) - try: - self._upload_to_whylabs(profile_view) - except Exception as e: - logger.error( - "Failed to upload whylogs profile view to Whylabs: %s", e - ) + try: + self._upload_to_whylabs(profile_view) + except Exception as e: + logger.error( + "Failed to upload whylogs profile view to Whylabs: %s", e + ) def save_visualizations( self, diff --git a/src/zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py b/src/zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py index a102d2fdfc1..6541c5b727f 100644 --- a/src/zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py +++ b/src/zenml/integrations/xgboost/materializers/xgboost_booster_materializer.py @@ -44,17 +44,15 @@ def load(self, data_type: Type[Any]) -> xgb.Booster: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + with tempfile.TemporaryDirectory(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - booster = xgb.Booster() - booster.load_model(temp_file) + # Copy from artifact store to temporary file + fileio.copy(filepath, temp_file) + booster = xgb.Booster() + booster.load_model(temp_file) - # Cleanup and return - fileio.rmtree(temp_dir) - return booster + return booster def save(self, booster: xgb.Booster) -> None: """Creates a JSON serialization for a xgboost Booster model. @@ -65,13 +63,7 @@ def save(self, booster: xgb.Booster) -> None: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Make a temporary phantom artifact - with tempfile.NamedTemporaryFile( - mode="w", suffix=".json", delete=False - ) as f: + with tempfile.NamedTemporaryFile(mode="w", suffix=".json") as f: booster.save_model(f.name) # Copy it into artifact store fileio.copy(f.name, filepath) - - # Close and remove the temporary file - f.close() - fileio.remove(f.name) diff --git a/src/zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py b/src/zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py index 08e33bbfa8b..89e49693c89 100644 --- a/src/zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py +++ b/src/zenml/integrations/xgboost/materializers/xgboost_dmatrix_materializer.py @@ -47,16 +47,14 @@ def load(self, data_type: Type[Any]) -> xgb.DMatrix: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Create a temporary folder - temp_dir = tempfile.mkdtemp(prefix="zenml-temp-") - temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) + with tempfile.mkdtemp(prefix="zenml-temp-") as temp_dir: + temp_file = os.path.join(str(temp_dir), DEFAULT_FILENAME) - # Copy from artifact store to temporary file - fileio.copy(filepath, temp_file) - matrix = xgb.DMatrix(temp_file) + # Copy from artifact store to temporary file + fileio.copy(filepath, temp_file) + matrix = xgb.DMatrix(temp_file) - # Cleanup and return - fileio.rmtree(temp_dir) - return matrix + return matrix def save(self, matrix: xgb.DMatrix) -> None: """Creates a binary serialization for a xgboost.DMatrix object. @@ -67,15 +65,11 @@ def save(self, matrix: xgb.DMatrix) -> None: filepath = os.path.join(self.uri, DEFAULT_FILENAME) # Make a temporary phantom artifact - with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f: + with tempfile.NamedTemporaryFile(mode="wb") as f: matrix.save_binary(f.name) # Copy it into artifact store fileio.copy(f.name, filepath) - # Close and remove the temporary file - f.close() - fileio.remove(f.name) - def extract_metadata( self, dataset: xgb.DMatrix ) -> Dict[str, "MetadataType"]: