From 4ceab10602432915964cabc4daa16e2843302329 Mon Sep 17 00:00:00 2001 From: Sandy Ryza Date: Sun, 16 Nov 2025 20:52:02 -0800 Subject: [PATCH] [SPARK-54435][SDP] spark-pipelines init should avoid overwriting existing directory --- python/pyspark/pipelines/init_cli.py | 5 +++++ python/pyspark/pipelines/tests/test_init_cli.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/python/pyspark/pipelines/init_cli.py b/python/pyspark/pipelines/init_cli.py index f8149b19263f..bd18c976e6f6 100644 --- a/python/pyspark/pipelines/init_cli.py +++ b/python/pyspark/pipelines/init_cli.py @@ -44,6 +44,11 @@ def example_python_materialized_view() -> DataFrame: def init(name: str) -> None: """Generates a simple pipeline project.""" project_dir = Path.cwd() / name + if project_dir.exists(): + raise FileExistsError( + f"Directory '{name}' already exists. " + "Please choose a different name or remove the existing directory." + ) project_dir.mkdir(parents=True, exist_ok=False) # Create the storage directory diff --git a/python/pyspark/pipelines/tests/test_init_cli.py b/python/pyspark/pipelines/tests/test_init_cli.py index e51bab6a4a69..f88956b647ac 100644 --- a/python/pyspark/pipelines/tests/test_init_cli.py +++ b/python/pyspark/pipelines/tests/test_init_cli.py @@ -72,6 +72,21 @@ def test_init(self): Path("transformations") / "example_sql_materialized_view.sql", ) + def test_init_existing_directory(self): + with tempfile.TemporaryDirectory() as temp_dir: + project_name = "test_project" + with change_dir(Path(temp_dir)): + init(project_name) + + with self.assertRaises(FileExistsError) as context: + init(project_name) + + expected_message = ( + f"Directory '{project_name}' already exists. " + "Please choose a different name or remove the existing directory." + ) + self.assertEqual(str(context.exception), expected_message) + if __name__ == "__main__": try: