diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkPipelinesSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkPipelinesSuite.scala index a97aabfd5a37..60e279ba2ddc 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkPipelinesSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkPipelinesSuite.scala @@ -58,7 +58,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach { val args = Array( "run", "--spec", - "pipeline.yml" + "spark-pipeline.yml" ) assert( SparkPipelines.constructSparkSubmitArgs( @@ -71,7 +71,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach { "abc/python/pyspark/pipelines/cli.py", "run", "--spec", - "pipeline.yml" + "spark-pipeline.yml" ) ) } @@ -83,7 +83,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach { "run", "--supervise", "--spec", - "pipeline.yml", + "spark-pipeline.yml", "--conf", "spark.conf2=3" ) @@ -101,7 +101,7 @@ class SparkPipelinesSuite extends SparkSubmitTestUtils with BeforeAndAfterEach { "abc/python/pyspark/pipelines/cli.py", "run", "--spec", - "pipeline.yml" + "spark-pipeline.yml" ) ) } diff --git a/docs/declarative-pipelines-programming-guide.md b/docs/declarative-pipelines-programming-guide.md index 3932d472cf65..0ca04c644f1b 100644 --- a/docs/declarative-pipelines-programming-guide.md +++ b/docs/declarative-pipelines-programming-guide.md @@ -96,7 +96,7 @@ configuration: spark.sql.shuffle.partitions: "1000" ``` -It's conventional to name pipeline spec files `pipeline.yml`. +It's conventional to name pipeline spec files `spark-pipeline.yml`. The `spark-pipelines init` command, described below, makes it easy to generate a pipeline project with default configuration and directory structure. @@ -113,7 +113,7 @@ The `spark-pipelines` command line interface (CLI) is the primary way to execute ### `spark-pipelines run` -`spark-pipelines run` launches an execution of a pipeline and monitors its progress until it completes. The `--spec` parameter allows selecting the pipeline spec file. If not provided, the CLI will look in the current directory and parent directories for a file named `pipeline.yml` or `pipeline.yaml`. +`spark-pipelines run` launches an execution of a pipeline and monitors its progress until it completes. The `--spec` parameter allows selecting the pipeline spec file. If not provided, the CLI will look in the current directory and parent directories for a file named `spark-pipeline.yml` or `spark-pipeline.yaml`. ### `spark-pipelines dry-run` diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json index 51bbdd862516..295b372cade5 100644 --- a/python/pyspark/errors/error-conditions.json +++ b/python/pyspark/errors/error-conditions.json @@ -908,7 +908,7 @@ }, "PIPELINE_SPEC_FILE_NOT_FOUND": { "message": [ - "No pipeline.yaml or pipeline.yml file provided in arguments or found in directory `` or readable ancestor directories." + "No spark-pipeline.yaml or spark-pipeline.yml file provided in arguments or found in directory `` or readable ancestor directories." ] }, "PIPELINE_SPEC_INVALID_GLOB_PATTERN": { diff --git a/python/pyspark/pipelines/cli.py b/python/pyspark/pipelines/cli.py index 3ba0bb58fe94..6c9d733c272e 100644 --- a/python/pyspark/pipelines/cli.py +++ b/python/pyspark/pipelines/cli.py @@ -51,7 +51,7 @@ from pyspark.pipelines.add_pipeline_analysis_context import add_pipeline_analysis_context -PIPELINE_SPEC_FILE_NAMES = ["pipeline.yaml", "pipeline.yml"] +PIPELINE_SPEC_FILE_NAMES = ["spark-pipeline.yaml", "spark-pipeline.yml"] @dataclass(frozen=True) diff --git a/python/pyspark/pipelines/init_cli.py b/python/pyspark/pipelines/init_cli.py index f8149b19263f..581b86875ff4 100644 --- a/python/pyspark/pipelines/init_cli.py +++ b/python/pyspark/pipelines/init_cli.py @@ -54,7 +54,7 @@ def init(name: str) -> None: storage_path = f"file://{storage_dir.resolve()}" # Write the spec file to the project directory - spec_file = project_dir / "pipeline.yml" + spec_file = project_dir / "spark-pipeline.yml" with open(spec_file, "w") as f: spec_content = SPEC.replace("{{ name }}", name).replace("{{ storage_root }}", storage_path) f.write(spec_content) diff --git a/python/pyspark/pipelines/tests/test_cli.py b/python/pyspark/pipelines/tests/test_cli.py index ff3022fa2966..f810ab099b7b 100644 --- a/python/pyspark/pipelines/tests/test_cli.py +++ b/python/pyspark/pipelines/tests/test_cli.py @@ -191,7 +191,7 @@ def test_unpack_pipeline_spec_bad_configuration(self): def test_find_pipeline_spec_in_current_directory(self): with tempfile.TemporaryDirectory() as temp_dir: - spec_path = Path(temp_dir) / "pipeline.yaml" + spec_path = Path(temp_dir) / "spark-pipeline.yaml" with spec_path.open("w") as f: f.write( """ @@ -208,7 +208,7 @@ def test_find_pipeline_spec_in_current_directory(self): def test_find_pipeline_spec_in_current_directory_yml(self): with tempfile.TemporaryDirectory() as temp_dir: - spec_path = Path(temp_dir) / "pipeline.yml" + spec_path = Path(temp_dir) / "spark-pipeline.yml" with spec_path.open("w") as f: f.write( """ @@ -225,10 +225,10 @@ def test_find_pipeline_spec_in_current_directory_yml(self): def test_find_pipeline_spec_in_current_directory_yml_and_yaml(self): with tempfile.TemporaryDirectory() as temp_dir: - with (Path(temp_dir) / "pipeline.yml").open("w") as f: + with (Path(temp_dir) / "spark-pipeline.yml").open("w") as f: f.write("") - with (Path(temp_dir) / "pipeline.yaml").open("w") as f: + with (Path(temp_dir) / "spark-pipeline.yaml").open("w") as f: f.write("") with self.assertRaises(PySparkException) as context: @@ -241,7 +241,7 @@ def test_find_pipeline_spec_in_parent_directory(self): parent_dir = Path(temp_dir) child_dir = Path(temp_dir) / "child" child_dir.mkdir() - spec_path = parent_dir / "pipeline.yaml" + spec_path = parent_dir / "spark-pipeline.yaml" with spec_path.open("w") as f: f.write( """ @@ -296,7 +296,7 @@ def mv2(): registry = LocalGraphElementRegistry() register_definitions( - outer_dir / "pipeline.yaml", registry, spec, self.spark, "test_graph_id" + outer_dir / "spark-pipeline.yaml", registry, spec, self.spark, "test_graph_id" ) self.assertEqual(len(registry.outputs), 1) self.assertEqual(registry.outputs[0].name, "mv1") @@ -319,7 +319,7 @@ def test_register_definitions_file_raises_error(self): registry = LocalGraphElementRegistry() with self.assertRaises(RuntimeError) as context: register_definitions( - outer_dir / "pipeline.yml", registry, spec, self.spark, "test_graph_id" + outer_dir / "spark-pipeline.yml", registry, spec, self.spark, "test_graph_id" ) self.assertIn("This is a test exception", str(context.exception)) @@ -377,7 +377,7 @@ def test_python_import_current_directory(self): registry = LocalGraphElementRegistry() with change_dir(inner_dir2): register_definitions( - inner_dir1 / "pipeline.yaml", + inner_dir1 / "spark-pipeline.yaml", registry, PipelineSpec( name="test_pipeline", @@ -394,7 +394,7 @@ def test_python_import_current_directory(self): def test_full_refresh_all_conflicts_with_full_refresh(self): with tempfile.TemporaryDirectory() as temp_dir: # Create a minimal pipeline spec - spec_path = Path(temp_dir) / "pipeline.yaml" + spec_path = Path(temp_dir) / "spark-pipeline.yaml" with spec_path.open("w") as f: f.write('{"name": "test_pipeline"}') @@ -418,7 +418,7 @@ def test_full_refresh_all_conflicts_with_full_refresh(self): def test_full_refresh_all_conflicts_with_refresh(self): with tempfile.TemporaryDirectory() as temp_dir: # Create a minimal pipeline spec - spec_path = Path(temp_dir) / "pipeline.yaml" + spec_path = Path(temp_dir) / "spark-pipeline.yaml" with spec_path.open("w") as f: f.write('{"name": "test_pipeline"}') @@ -443,7 +443,7 @@ def test_full_refresh_all_conflicts_with_refresh(self): def test_full_refresh_all_conflicts_with_both(self): with tempfile.TemporaryDirectory() as temp_dir: # Create a minimal pipeline spec - spec_path = Path(temp_dir) / "pipeline.yaml" + spec_path = Path(temp_dir) / "spark-pipeline.yaml" with spec_path.open("w") as f: f.write('{"name": "test_pipeline"}')