From 99a5c3313a98071581b598235241ecc171ae5f46 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Tue, 24 Jun 2025 13:41:50 -0700 Subject: [PATCH 01/13] pipelines init command --- .../pipelines/init/error-cases/.ruff.toml | 2 + .../pipelines/init/error-cases/output.txt | 56 ++++++++ .../output/pipelines_project/.gitignore | 8 ++ .../.vscode/__builtins__.pyi | 3 + .../pipelines_project/.vscode/extensions.json | 7 + .../pipelines_project/.vscode/settings.json | 21 +++ .../output/pipelines_project/README.md | 41 ++++++ .../output/pipelines_project/databricks.yml | 48 +++++++ .../pipelines_project_pipeline/README.md | 22 +++ .../pipelines_project.pipeline.yml | 12 ++ .../sample_trips_pipelines_project.py | 16 +++ .../sample_zones_pipelines_project.py | 19 +++ .../utilities/utils.py | 8 ++ acceptance/pipelines/init/error-cases/script | 21 +++ acceptance/pipelines/init/python/.ruff.toml | 2 + acceptance/pipelines/init/python/input.json | 7 + acceptance/pipelines/init/python/output.txt | 14 ++ .../output/my_pipelines_project/.gitignore | 8 ++ .../.vscode/__builtins__.pyi | 3 + .../.vscode/extensions.json | 7 + .../.vscode/settings.json | 21 +++ .../output/my_pipelines_project/README.md | 41 ++++++ .../my_pipelines_project/databricks.yml | 48 +++++++ .../my_pipelines_project_pipeline/README.md | 22 +++ .../my_pipelines_project.pipeline.yml | 12 ++ .../sample_trips_my_pipelines_project.py | 16 +++ .../sample_zones_my_pipelines_project.py | 19 +++ .../utilities/utils.py | 8 ++ acceptance/pipelines/init/python/script | 11 ++ acceptance/pipelines/init/python/test.toml | 2 + acceptance/pipelines/init/sql/.ruff.toml | 2 + acceptance/pipelines/init/sql/input.json | 7 + acceptance/pipelines/init/sql/output.txt | 14 ++ .../my_sql_pipelines_project/.gitignore | 8 ++ .../.vscode/__builtins__.pyi | 3 + .../.vscode/extensions.json | 7 + .../.vscode/settings.json | 21 +++ .../output/my_sql_pipelines_project/README.md | 41 ++++++ .../my_sql_pipelines_project/databricks.yml | 48 +++++++ .../README.md | 21 +++ .../my_sql_pipelines_project.pipeline.yml | 12 ++ .../sample_trips_my_sql_pipelines_project.sql | 9 ++ .../sample_zones_my_sql_pipelines_project.sql | 10 ++ acceptance/pipelines/init/sql/script | 11 ++ acceptance/pipelines/init/sql/test.toml | 2 + .../install-pipelines-cli/output.txt | 28 +++- cmd/pipelines/init.go | 42 ++++++ cmd/pipelines/pipelines.go | 7 +- cmd/root/root.go | 8 +- libs/template/template.go | 8 ++ libs/template/templates/pipelines/README.md | 3 + .../pipelines/databricks_template_schema.json | 57 ++++++++ .../pipelines/library/variables.tmpl | 33 +++++ .../pipelines/template/__preamble.tmpl | 16 +++ .../{{.project_name}}/.gitignore.tmpl | 8 ++ .../.vscode/__builtins__.pyi | 3 + .../{{.project_name}}/.vscode/extensions.json | 7 + .../.vscode/settings.json.tmpl | 22 +++ .../template/{{.project_name}}/README.md.tmpl | 41 ++++++ .../{{.project_name}}/databricks.yml.tmpl | 48 +++++++ .../{{.project_name}}_pipeline/README.md.tmpl | 48 +++++++ .../sample_exploration.ipynb.tmpl | 130 ++++++++++++++++++ .../sample_trips_{{.project_name}}.py.tmpl | 16 +++ .../sample_trips_{{.project_name}}.sql.tmpl | 9 ++ .../sample_zones_{{.project_name}}.py.tmpl | 19 +++ .../sample_zones_{{.project_name}}.sql.tmpl | 10 ++ .../utilities/utils.py | 8 ++ .../{{.project_name}}.pipeline.yml.tmpl | 12 ++ main.go | 2 +- ruff.toml | 3 +- 70 files changed, 1322 insertions(+), 7 deletions(-) create mode 100644 acceptance/pipelines/init/error-cases/.ruff.toml create mode 100644 acceptance/pipelines/init/error-cases/output.txt create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/.gitignore create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/__builtins__.pyi create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/extensions.json create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/README.md create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py create mode 100644 acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/utilities/utils.py create mode 100644 acceptance/pipelines/init/error-cases/script create mode 100644 acceptance/pipelines/init/python/.ruff.toml create mode 100644 acceptance/pipelines/init/python/input.json create mode 100644 acceptance/pipelines/init/python/output.txt create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/.gitignore create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/__builtins__.pyi create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/extensions.json create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/README.md create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py create mode 100644 acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/utilities/utils.py create mode 100644 acceptance/pipelines/init/python/script create mode 100644 acceptance/pipelines/init/python/test.toml create mode 100644 acceptance/pipelines/init/sql/.ruff.toml create mode 100644 acceptance/pipelines/init/sql/input.json create mode 100644 acceptance/pipelines/init/sql/output.txt create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.gitignore create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/__builtins__.pyi create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/extensions.json create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql create mode 100644 acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql create mode 100644 acceptance/pipelines/init/sql/script create mode 100644 acceptance/pipelines/init/sql/test.toml create mode 100644 cmd/pipelines/init.go create mode 100644 libs/template/templates/pipelines/README.md create mode 100644 libs/template/templates/pipelines/databricks_template_schema.json create mode 100644 libs/template/templates/pipelines/library/variables.tmpl create mode 100644 libs/template/templates/pipelines/template/__preamble.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/.gitignore.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/.vscode/extensions.json create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py create mode 100644 libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl diff --git a/acceptance/pipelines/init/error-cases/.ruff.toml b/acceptance/pipelines/init/error-cases/.ruff.toml new file mode 100644 index 0000000000..1ab316576b --- /dev/null +++ b/acceptance/pipelines/init/error-cases/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.py"] diff --git a/acceptance/pipelines/init/error-cases/output.txt b/acceptance/pipelines/init/error-cases/output.txt new file mode 100644 index 0000000000..9a93cff052 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output.txt @@ -0,0 +1,56 @@ + +=== Install pipelines CLI +>>> errcode [CLI] install-pipelines-cli -d ./subdir +pipelines successfully installed in directory "./subdir" + +=== Test with invalid project name (contains uppercase letters) +>>> errcode ./subdir/pipelines init --config-file ./invalid_input.json --output-dir output +Error: failed to load config from file ./invalid_input.json: invalid value for project_name: "InvalidProjectName". Name must consist of lower case letters, numbers, and underscores. +Usage: + pipelines init [flags] + +Flags: + --config-file string JSON file containing key value pairs of input parameters required for template initialization + -h, --help help for init + --output-dir string Directory to write the initialized template to + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Error: failed to load config from file ./invalid_input.json: invalid value for project_name: "InvalidProjectName". Name must consist of lower case letters, numbers, and underscores. + +Exit code: 1 + +=== Test with missing config file +>>> errcode ./subdir/pipelines init --output-dir output + +Welcome to the template for pipelines! + + +Your new project has been created in the 'pipelines_project' directory! + +Refer to the README.md file for "getting started" instructions! + +=== Test with non-existent config file +>>> errcode ./subdir/pipelines init --config-file ./nonexistent.json --output-dir output +Error: failed to load config from file ./nonexistent.json: open ./nonexistent.json: no such file or directory +Usage: + pipelines init [flags] + +Flags: + --config-file string JSON file containing key value pairs of input parameters required for template initialization + -h, --help help for init + --output-dir string Directory to write the initialized template to + +Global Flags: + --debug enable debug logging + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Error: failed to load config from file ./nonexistent.json: open ./nonexistent.json: no such file or directory + +Exit code: 1 diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.gitignore b/acceptance/pipelines/init/error-cases/output/pipelines_project/.gitignore new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/extensions.json b/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json b/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json new file mode 100644 index 0000000000..bf3645b890 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json @@ -0,0 +1,21 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["resources/pipelines_project_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/README.md b/acceptance/pipelines/init/error-cases/output/pipelines_project/README.md new file mode 100644 index 0000000000..ffe836dd36 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/README.md @@ -0,0 +1,41 @@ +# pipelines_project + +The 'pipelines_project' project was generated by using the Pipelines template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ pipelines deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ pipelines deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ pipelines summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ pipelines run + ``` diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml b/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml new file mode 100644 index 0000000000..73e251daaa --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks pipelines definition for pipelines_project. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +project: + name: pipelines_project + +include: + - resources/*.yml + - resources/*/*.yml + - ./*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + deploy_on_run: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: hive_metastore + schema: ${workspace.current_user.short_name} + notifications: [] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE + variables: + catalog: hive_metastore + schema: default + notifications: [[USERNAME]] diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md new file mode 100644 index 0000000000..6476007896 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md @@ -0,0 +1,22 @@ +# pipelines_project_pipeline + +This folder defines all source code for the pipelines_project_pipeline pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `utilities` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_pipelines_project.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml new file mode 100644 index 0000000000..84191e234d --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml @@ -0,0 +1,12 @@ +resources: + pipelines: + pipelines_project_pipeline: + name: pipelines_project_pipeline + serverless: true + channel: "PREVIEW" + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py new file mode 100644 index 0000000000..3462df33df --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py @@ -0,0 +1,16 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_pipelines_project(): + return ( + spark.read.table("samples.nyctaxi.trips") + .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + ) diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py new file mode 100644 index 0000000000..51c8c87769 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py @@ -0,0 +1,19 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_pipelines_project(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table("sample_trips_pipelines_project") + .groupBy(col("pickup_zip")) + .agg( + sum("fare_amount").alias("total_fare") + ) + ) diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/utilities/utils.py b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/acceptance/pipelines/init/error-cases/script b/acceptance/pipelines/init/error-cases/script new file mode 100644 index 0000000000..afb083187c --- /dev/null +++ b/acceptance/pipelines/init/error-cases/script @@ -0,0 +1,21 @@ +tmpdir="./subdir" +pipelines="$tmpdir/pipelines" +mkdir -p $tmpdir + +title "Install pipelines CLI" +trace errcode $CLI install-pipelines-cli -d $tmpdir + +title "Test with invalid project name (contains uppercase letters)" +echo '{"project_name": "InvalidProjectName"}' > invalid_input.json +trace errcode $pipelines init --config-file ./invalid_input.json --output-dir output + +title "Test with missing config file" +trace errcode $pipelines init --output-dir output + +title "Test with non-existent config file" +trace errcode $pipelines init --config-file ./nonexistent.json --output-dir output + +# Clean up +rm -f invalid_input.json +rm -f $pipelines +rm -rf $tmpdir diff --git a/acceptance/pipelines/init/python/.ruff.toml b/acceptance/pipelines/init/python/.ruff.toml new file mode 100644 index 0000000000..1ab316576b --- /dev/null +++ b/acceptance/pipelines/init/python/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.py"] diff --git a/acceptance/pipelines/init/python/input.json b/acceptance/pipelines/init/python/input.json new file mode 100644 index 0000000000..f71e40e506 --- /dev/null +++ b/acceptance/pipelines/init/python/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_pipelines_project", + "default_catalog": "main", + "personal_schemas": "yes", + "shared_schema": "default", + "language": "python" +} diff --git a/acceptance/pipelines/init/python/output.txt b/acceptance/pipelines/init/python/output.txt new file mode 100644 index 0000000000..4b010b9a75 --- /dev/null +++ b/acceptance/pipelines/init/python/output.txt @@ -0,0 +1,14 @@ + +=== Install pipelines CLI +>>> errcode [CLI] install-pipelines-cli -d ./subdir +pipelines successfully installed in directory "./subdir" + +=== Test basic pipelines init with configuration file +>>> ./subdir/pipelines init --config-file ./input.json --output-dir output + +Welcome to the template for pipelines! + + +Your new project has been created in the 'my_pipelines_project' directory! + +Refer to the README.md file for "getting started" instructions! diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.gitignore b/acceptance/pipelines/init/python/output/my_pipelines_project/.gitignore new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/extensions.json b/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json b/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json new file mode 100644 index 0000000000..9b1344f30c --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json @@ -0,0 +1,21 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["resources/my_pipelines_project_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/README.md b/acceptance/pipelines/init/python/output/my_pipelines_project/README.md new file mode 100644 index 0000000000..54485ec4f2 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/README.md @@ -0,0 +1,41 @@ +# my_pipelines_project + +The 'my_pipelines_project' project was generated by using the Pipelines template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ pipelines deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ pipelines deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ pipelines summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ pipelines run + ``` diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml b/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml new file mode 100644 index 0000000000..260e9a4004 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks pipelines definition for my_pipelines_project. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +project: + name: my_pipelines_project + +include: + - resources/*.yml + - resources/*/*.yml + - ./*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + deploy_on_run: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: main + schema: ${workspace.current_user.short_name} + notifications: [] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE + variables: + catalog: main + schema: default + notifications: [[USERNAME]] diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md new file mode 100644 index 0000000000..2fdc1443ee --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md @@ -0,0 +1,22 @@ +# my_pipelines_project_pipeline + +This folder defines all source code for the my_pipelines_project_pipeline pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `utilities` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_my_pipelines_project.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml new file mode 100644 index 0000000000..d618c4a518 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml @@ -0,0 +1,12 @@ +resources: + pipelines: + my_pipelines_project_pipeline: + name: my_pipelines_project_pipeline + serverless: true + channel: "PREVIEW" + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py new file mode 100644 index 0000000000..d1efafa766 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py @@ -0,0 +1,16 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_my_pipelines_project(): + return ( + spark.read.table("samples.nyctaxi.trips") + .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + ) diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py new file mode 100644 index 0000000000..957821b904 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py @@ -0,0 +1,19 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_my_pipelines_project(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table("sample_trips_my_pipelines_project") + .groupBy(col("pickup_zip")) + .agg( + sum("fare_amount").alias("total_fare") + ) + ) diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/utilities/utils.py b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/acceptance/pipelines/init/python/script b/acceptance/pipelines/init/python/script new file mode 100644 index 0000000000..0a865a44b7 --- /dev/null +++ b/acceptance/pipelines/init/python/script @@ -0,0 +1,11 @@ +tmpdir="./subdir" +pipelines="$tmpdir/pipelines" +mkdir -p $tmpdir + +title "Install pipelines CLI" +trace errcode $CLI install-pipelines-cli -d $tmpdir + +title "Test basic pipelines init with configuration file" +trace $pipelines init --config-file ./input.json --output-dir output +rm -f $pipelines +rm -rf $tmpdir diff --git a/acceptance/pipelines/init/python/test.toml b/acceptance/pipelines/init/python/test.toml new file mode 100644 index 0000000000..0e8c8a3840 --- /dev/null +++ b/acceptance/pipelines/init/python/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = true diff --git a/acceptance/pipelines/init/sql/.ruff.toml b/acceptance/pipelines/init/sql/.ruff.toml new file mode 100644 index 0000000000..1ab316576b --- /dev/null +++ b/acceptance/pipelines/init/sql/.ruff.toml @@ -0,0 +1,2 @@ +[format] +exclude = ["*.py"] diff --git a/acceptance/pipelines/init/sql/input.json b/acceptance/pipelines/init/sql/input.json new file mode 100644 index 0000000000..41480c730b --- /dev/null +++ b/acceptance/pipelines/init/sql/input.json @@ -0,0 +1,7 @@ +{ + "project_name": "my_sql_pipelines_project", + "default_catalog": "main", + "personal_schemas": "no", + "shared_schema": "shared_dev", + "language": "sql" +} diff --git a/acceptance/pipelines/init/sql/output.txt b/acceptance/pipelines/init/sql/output.txt new file mode 100644 index 0000000000..ad3f464890 --- /dev/null +++ b/acceptance/pipelines/init/sql/output.txt @@ -0,0 +1,14 @@ + +=== Install pipelines CLI +>>> errcode [CLI] install-pipelines-cli -d ./subdir +pipelines successfully installed in directory "./subdir" + +=== Test pipelines init with SQL configuration +>>> ./subdir/pipelines init --config-file ./input.json --output-dir output + +Welcome to the template for pipelines! + + +Your new project has been created in the 'my_sql_pipelines_project' directory! + +Refer to the README.md file for "getting started" instructions! diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.gitignore b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.gitignore new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.gitignore @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/extensions.json b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json new file mode 100644 index 0000000000..aaa772a7fe --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json @@ -0,0 +1,21 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["resources/my_sql_pipelines_project_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md new file mode 100644 index 0000000000..f9ac7b56c5 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md @@ -0,0 +1,41 @@ +# my_sql_pipelines_project + +The 'my_sql_pipelines_project' project was generated by using the Pipelines template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ pipelines deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ pipelines deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ pipelines summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ pipelines run + ``` diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml new file mode 100644 index 0000000000..81621b8848 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml @@ -0,0 +1,48 @@ +# This is a Databricks pipelines definition for my_sql_pipelines_project. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +project: + name: my_sql_pipelines_project + +include: + - resources/*.yml + - resources/*/*.yml + - ./*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + deploy_on_run: true + workspace: + host: [DATABRICKS_URL] + variables: + catalog: main + schema: shared_dev + notifications: [] + + prod: + mode: production + workspace: + host: [DATABRICKS_URL] + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. + root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} + permissions: + - user_name: [USERNAME] + level: CAN_MANAGE + variables: + catalog: main + schema: shared_dev + notifications: [[USERNAME]] diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md new file mode 100644 index 0000000000..94ec0b8a4f --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md @@ -0,0 +1,21 @@ +# my_sql_pipelines_project_pipeline + +This folder defines all source code for the 'my_sql_pipelines_project_pipeline' pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_my_sql_pipelines_project.sql" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml new file mode 100644 index 0000000000..1bb72eacc9 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml @@ -0,0 +1,12 @@ +resources: + pipelines: + my_sql_pipelines_project_pipeline: + name: my_sql_pipelines_project_pipeline + serverless: true + channel: "PREVIEW" + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql new file mode 100644 index 0000000000..5792d69972 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_my_sql_pipelines_project AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql new file mode 100644 index 0000000000..1275615d57 --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_my_sql_pipelines_project AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_my_sql_pipelines_project +GROUP BY pickup_zip diff --git a/acceptance/pipelines/init/sql/script b/acceptance/pipelines/init/sql/script new file mode 100644 index 0000000000..ba801454f0 --- /dev/null +++ b/acceptance/pipelines/init/sql/script @@ -0,0 +1,11 @@ +tmpdir="./subdir" +pipelines="$tmpdir/pipelines" +mkdir -p $tmpdir + +title "Install pipelines CLI" +trace errcode $CLI install-pipelines-cli -d $tmpdir + +title "Test pipelines init with SQL configuration" +trace $pipelines init --config-file ./input.json --output-dir output +rm -f $pipelines +rm -rf $tmpdir diff --git a/acceptance/pipelines/init/sql/test.toml b/acceptance/pipelines/init/sql/test.toml new file mode 100644 index 0000000000..0e8c8a3840 --- /dev/null +++ b/acceptance/pipelines/init/sql/test.toml @@ -0,0 +1,2 @@ +Local = true +Cloud = true diff --git a/acceptance/pipelines/install-pipelines-cli/output.txt b/acceptance/pipelines/install-pipelines-cli/output.txt index c4ed9fb91e..b2abcce147 100644 --- a/acceptance/pipelines/install-pipelines-cli/output.txt +++ b/acceptance/pipelines/install-pipelines-cli/output.txt @@ -8,9 +8,21 @@ Pipelines CLI (stub, to be filled in) Usage: pipelines [flags] + pipelines [command] + +Available Commands: + completion Generate the autocompletion script for the specified shell + help Help about any command + init Initialize a new pipelines project Flags: - -h, --help help for pipelines + --debug enable debug logging + -h, --help help for pipelines + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "pipelines [command] --help" for more information about a command. === pipelines already installed >>> errcode [CLI] install-pipelines-cli -d ./subdir @@ -31,6 +43,18 @@ Pipelines CLI (stub, to be filled in) Usage: pipelines [flags] + pipelines [command] + +Available Commands: + completion Generate the autocompletion script for the specified shell + help Help about any command + init Initialize a new pipelines project Flags: - -h, --help help for pipelines + --debug enable debug logging + -h, --help help for pipelines + -o, --output type output type: text or json (default text) + -p, --profile string ~/.databrickscfg profile + -t, --target string bundle target to use (if applicable) + +Use "pipelines [command] --help" for more information about a command. diff --git a/cmd/pipelines/init.go b/cmd/pipelines/init.go new file mode 100644 index 0000000000..4f324e7c08 --- /dev/null +++ b/cmd/pipelines/init.go @@ -0,0 +1,42 @@ +package pipelines + +import ( + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/template" + "github.com/spf13/cobra" +) + +func initCommand() *cobra.Command { + var outputDir string + var configFile string + cmd := &cobra.Command{ + Use: "init", + Short: "Initialize a new pipelines project", + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + r := template.Resolver{ + TemplatePathOrUrl: "pipelines", + ConfigFile: configFile, + OutputDir: outputDir, + } + + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) + + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + return nil + }, + } + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") + cmd.Flags().StringVar(&configFile, "config-file", "", "JSON file containing key value pairs of input parameters required for template initialization") + return cmd +} diff --git a/cmd/pipelines/pipelines.go b/cmd/pipelines/pipelines.go index 31188f3448..511fd0ee2a 100644 --- a/cmd/pipelines/pipelines.go +++ b/cmd/pipelines/pipelines.go @@ -1,10 +1,13 @@ package pipelines import ( + "context" + + "github.com/databricks/cli/cmd/root" "github.com/spf13/cobra" ) -func New() *cobra.Command { +func New(ctx context.Context) *cobra.Command { cmd := &cobra.Command{ Use: "pipelines", Short: "Pipelines CLI", @@ -13,6 +16,8 @@ func New() *cobra.Command { _ = cmd.Help() }, } + root.SetupRootCommand(ctx, cmd) + cmd.AddCommand(initCommand()) return cmd } diff --git a/cmd/root/root.go b/cmd/root/root.go index 9815d0288d..3c7c3c70e4 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -37,6 +37,12 @@ func New(ctx context.Context) *cobra.Command { SilenceErrors: true, } + SetupRootCommand(ctx, cmd) + cmd.SetVersionTemplate("Databricks CLI v{{.Version}}\n") + return cmd +} + +func SetupRootCommand(ctx context.Context, cmd *cobra.Command) { // Pass the context along through the command during initialization. // It will be overwritten when the command is executed. cmd.SetContext(ctx) @@ -88,8 +94,6 @@ func New(ctx context.Context) *cobra.Command { } cmd.SetFlagErrorFunc(flagErrorFunc) - cmd.SetVersionTemplate("Databricks CLI v{{.Version}}\n") - return cmd } // Wrap flag errors to include the usage string. diff --git a/libs/template/template.go b/libs/template/template.go index 43569050e2..18993bbf94 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -27,6 +27,7 @@ const ( DefaultPython TemplateName = "default-python" DefaultSql TemplateName = "default-sql" LakeflowPipelines TemplateName = "lakeflow-pipelines" + Pipelines TemplateName = "pipelines" DbtSql TemplateName = "dbt-sql" MlopsStacks TemplateName = "mlops-stacks" DefaultPydabs TemplateName = "default-pydabs" @@ -54,6 +55,13 @@ var databricksTemplates = []Template{ Reader: &builtinReader{name: string(LakeflowPipelines)}, Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: LakeflowPipelines}}, }, + { + name: Pipelines, + hidden: true, + description: "The default template for pipelines CLI", + Reader: &builtinReader{name: string(Pipelines)}, + Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: Pipelines}}, + }, { name: DbtSql, description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", diff --git a/libs/template/templates/pipelines/README.md b/libs/template/templates/pipelines/README.md new file mode 100644 index 0000000000..1eec4c7f53 --- /dev/null +++ b/libs/template/templates/pipelines/README.md @@ -0,0 +1,3 @@ +# Pipelines + +Default template for pipelines diff --git a/libs/template/templates/pipelines/databricks_template_schema.json b/libs/template/templates/pipelines/databricks_template_schema.json new file mode 100644 index 0000000000..ac9d9deb2c --- /dev/null +++ b/libs/template/templates/pipelines/databricks_template_schema.json @@ -0,0 +1,57 @@ +{ + "welcome_message": "\nWelcome to the template for pipelines!", + "properties": { + "project_name": { + "type": "string", + "default": "pipelines_project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name", + "order": 1, + "pattern": "^[a-z0-9_]+$", + "pattern_match_failure_message": "Name must consist of lower case letters, numbers, and underscores." + }, + "default_catalog": { + "type": "string", + "default": "{{default_catalog}}", + "pattern": "^\\w*$", + "pattern_match_failure_message": "Invalid catalog name.", + "description": "\nInitial catalog:\ndefault_catalog", + "order": 3 + }, + "personal_schemas": { + "type": "string", + "description": "\nUse a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", + "default": "yes", + "enum": [ + "yes", + "no" + ], + "order": 4 + }, + "shared_schema": { + "skip_prompt_if": { + "properties": { + "personal_schemas": { + "const": "yes" + } + } + }, + "type": "string", + "default": "default", + "pattern": "^\\w+$", + "pattern_match_failure_message": "Invalid schema name.", + "description": "\nInitial schema during development:\ndefault_schema", + "order": 5 + }, + "language": { + "type": "string", + "default": "python", + "description": "\nInitial language for this project:\nlanguage", + "enum": [ + "python", + "sql" + ], + "order": 6 + } + }, + "success_message": "\n\nYour new project has been created in the '{{.project_name}}' directory!\n\nRefer to the README.md file for \"getting started\" instructions!" +} diff --git a/libs/template/templates/pipelines/library/variables.tmpl b/libs/template/templates/pipelines/library/variables.tmpl new file mode 100644 index 0000000000..9c5c36b449 --- /dev/null +++ b/libs/template/templates/pipelines/library/variables.tmpl @@ -0,0 +1,33 @@ +{{- define `pipeline_name` -}} + {{ .project_name }}_pipeline +{{- end }} + +{{- define `job_name` -}} + {{ .project_name }}_job +{{- end }} + +{{- define `static_dev_schema` -}} + {{- if (regexp "^yes").MatchString .personal_schemas -}} + {{ short_name }} + {{- else -}} + {{ .shared_schema }} + {{- end}} +{{- end }} + + +{{- define `dev_schema` -}} + {{- if (regexp "^yes").MatchString .personal_schemas -}} + ${workspace.current_user.short_name} + {{- else -}} + {{ .shared_schema }} + {{- end}} +{{- end }} + + +{{- define `prod_schema` -}} + {{- if (regexp "^yes").MatchString .personal_schemas -}} + default + {{- else -}} + {{ .shared_schema }} + {{- end}} +{{- end }} diff --git a/libs/template/templates/pipelines/template/__preamble.tmpl b/libs/template/templates/pipelines/template/__preamble.tmpl new file mode 100644 index 0000000000..c6c0c2321f --- /dev/null +++ b/libs/template/templates/pipelines/template/__preamble.tmpl @@ -0,0 +1,16 @@ +# Preamble + +This file only contains template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +{{$isSQL := eq .language "sql"}} + +{{if $isSQL}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py"}} +{{else}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql"}} +{{end}} diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.gitignore.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/.gitignore.tmpl new file mode 100644 index 0000000000..f6a3b5ff93 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/.gitignore.tmpl @@ -0,0 +1,8 @@ +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +**/explorations/** +**/!explorations/README.md diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi new file mode 100644 index 0000000000..0edd5181bc --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/extensions.json new file mode 100644 index 0000000000..5d15eba363 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl new file mode 100644 index 0000000000..6a87715ae2 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl @@ -0,0 +1,22 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + {{- /* Unfortunately extraPaths doesn't support globs!! See: https://github.com/microsoft/pylance-release/issues/973 */}} + "python.analysis.extraPaths": ["resources/{{.project_name}}_pipeline"], + "files.exclude": { + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + }, +} diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 0000000000..d8166ba389 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,41 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the Pipelines template. + +## Setup + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace, if you have not done so already: + ``` + $ databricks auth login + ``` + +3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from + https://www.databricks.com/blog/announcing-pycharm-integration-databricks. + + +## Deploying resources + +1. To deploy a development copy of this project, type: + ``` + $ pipelines deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + +2. Similarly, to deploy a production copy, type: + ``` + $ pipelines deploy --target prod + ``` + +3. Use the "summary" comand to review everything that was deployed: + ``` + $ pipelines summary + ``` + +4. To run a job or pipeline, use the "run" command: + ``` + $ pipelines run + ``` diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 0000000000..5b5aa190c6 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,48 @@ +# This is a Databricks pipelines definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +project: + name: {{.project_name}} + +include: + - resources/*.yml + - resources/*/*.yml + - ./*.yml + +# Variable declarations. These variables are assigned in the dev/prod targets below. +variables: + catalog: + description: The catalog to use + schema: + description: The schema to use + notifications: + description: The email addresses to use for failure notifications + +targets: + dev: + # The default target uses 'mode: development' to create a development copy. + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default. + # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + mode: development + default: true + deploy_on_run: true + workspace: + host: {{workspace_host}} + variables: + catalog: {{.default_catalog}} + schema: {{template `dev_schema` .}} + notifications: [] + + prod: + mode: production + workspace: + host: {{workspace_host}} + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. + root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} + permissions: + - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} + level: CAN_MANAGE + variables: + catalog: {{.default_catalog}} + schema: {{template `prod_schema` .}} + notifications: [{{user_name}}] diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl new file mode 100644 index 0000000000..b085a301a6 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl @@ -0,0 +1,48 @@ +{{- if (eq .language "python") -}} + +# {{template `pipeline_name` .}} + +This folder defines all source code for the {{template `pipeline_name` .}} pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `utilities` (optional): Utility functions and Python modules used in this pipeline. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_{{ .project_name }}.py" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. +{{ else -}} + +# {{template `pipeline_name` .}} + +This folder defines all source code for the '{{template `pipeline_name` .}}' pipeline: + +- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. +- `transformations`: All dataset definitions and transformations. +- `data_sources` (optional): View definitions describing the source data for this pipeline. + +## Getting Started + +To get started, go to the `transformations` folder -- most of the relevant source code lives there: + +* By convention, every dataset under `transformations` is in a separate file. +* Take a look at the sample under "sample_trips_{{ .project_name }}.sql" to get familiar with the syntax. + Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. +* Use `Run file` to run and preview a single transformation. +* Use `Run pipeline` to run _all_ transformations in the entire pipeline. +* Use `+ Add` in the file browser to add a new data set definition. +* Use `Schedule` to run the pipeline on a schedule! + +For more tutorials and reference material, see https://docs.databricks.com/dlt. +{{ end -}} diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl new file mode 100644 index 0000000000..967e663fae --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl @@ -0,0 +1,130 @@ +{{- if (eq .language "python") -}} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "19a992e9-55e0-49e4-abc7-8c92c420dd5b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "1b0a82fa-3c6a-4f29-bb43-ded1c4fd77c6", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM {{ .default_catalog}}.{{template `static_dev_schema` .}}.{{ .project_name }}\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} +{{ else -}} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3bd3cbb1-1518-4d0a-a8d1-f08da3f8840b", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "d30a8e05-bf7a-47e1-982e-b37e64cd6d43", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "-- !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "USE CATALOG `{{.default_catalog}}`;\n", + "USE SCHEMA `{{template `static_dev_schema` .}}`;\n", + "\n", + "SELECT * from {{ .project_name }};" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "sql", + "notebookMetadata": {}, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} +{{ end -}} diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl new file mode 100644 index 0000000000..a191f88b9f --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl @@ -0,0 +1,16 @@ +import dlt +from pyspark.sql.functions import col +from utilities import utils + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_trips_{{ .project_name }}(): + return ( + spark.read.table("samples.nyctaxi.trips") + .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) + ) diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl new file mode 100644 index 0000000000..b95a95da4d --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl @@ -0,0 +1,9 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_trips_{{ .project_name }} AS +SELECT + pickup_zip, + fare_amount +FROM samples.nyctaxi.trips diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl new file mode 100644 index 0000000000..64e40036d0 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl @@ -0,0 +1,19 @@ +import dlt +from pyspark.sql.functions import col, sum + + +# This file defines a sample transformation. +# Edit the sample below or add new transformations +# using "+ Add" in the file browser. + + +@dlt.table +def sample_zones_{{ .project_name }}(): + # Read from the "sample_trips" table, then sum all the fares + return ( + spark.read.table("sample_trips_{{ .project_name }}") + .groupBy(col("pickup_zip")) + .agg( + sum("fare_amount").alias("total_fare") + ) + ) diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl new file mode 100644 index 0000000000..ab84f4066a --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl @@ -0,0 +1,10 @@ +-- This file defines a sample transformation. +-- Edit the sample below or add new transformations +-- using "+ Add" in the file browser. + +CREATE MATERIALIZED VIEW sample_zones_{{ .project_name }} AS +SELECT + pickup_zip, + SUM(fare_amount) AS total_fare +FROM sample_trips_{{ .project_name }} +GROUP BY pickup_zip diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py new file mode 100644 index 0000000000..ff039898f0 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py @@ -0,0 +1,8 @@ +from pyspark.sql.functions import udf +from pyspark.sql.types import FloatType + + +@udf(returnType=FloatType()) +def distance_km(distance_miles): + """Convert distance from miles to kilometers (1 mile = 1.60934 km).""" + return distance_miles * 1.60934 diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl new file mode 100644 index 0000000000..23df081f00 --- /dev/null +++ b/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl @@ -0,0 +1,12 @@ +resources: + pipelines: + {{template `pipeline_name` .}}: + name: {{template `pipeline_name` .}} + serverless: true + channel: "PREVIEW" + catalog: ${var.catalog} + schema: ${var.schema} + root_path: "." + libraries: + - glob: + include: transformations/** diff --git a/main.go b/main.go index 7f390d9edd..1a351cf457 100644 --- a/main.go +++ b/main.go @@ -18,7 +18,7 @@ import ( func getCommand(ctx context.Context) *cobra.Command { invokedAs := filepath.Base(os.Args[0]) if strings.HasPrefix(invokedAs, "pipelines") { - return pipelines.New() + return pipelines.New(ctx) } return cmd.New(ctx) } diff --git a/ruff.toml b/ruff.toml index 5838db95ee..20b36e203a 100644 --- a/ruff.toml +++ b/ruff.toml @@ -3,5 +3,6 @@ line-length = 150 exclude = [ "tagging.py", # tagging.py is synced from universe in the `openapi/tagging` directory and follows different format rules. - "acceptance/bundle/templates/lakeflow-pipelines/**/*.py" # files are manually formatted + "acceptance/bundle/templates/lakeflow-pipelines/**/*.py", # files are manually formatted + "acceptance/bundle/templates/pipelines/**/*.py" # files are manually formatted ] From 893c34d7d2951d69f48224cb0ca57da027886066 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Tue, 24 Jun 2025 15:58:55 -0700 Subject: [PATCH 02/13] no args allowed in pipelines project --- cmd/pipelines/init.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/pipelines/init.go b/cmd/pipelines/init.go index 4f324e7c08..51fb98569e 100644 --- a/cmd/pipelines/init.go +++ b/cmd/pipelines/init.go @@ -13,6 +13,7 @@ func initCommand() *cobra.Command { Use: "init", Short: "Initialize a new pipelines project", PreRunE: root.MustWorkspaceClient, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() From a473e826252bfbabe210f6ef77f689989fadd06b Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Wed, 25 Jun 2025 02:04:24 -0700 Subject: [PATCH 03/13] changed to bundle terminology --- .../init/error-cases/output/pipelines_project/databricks.yml | 3 ++- .../init/python/output/my_pipelines_project/databricks.yml | 3 ++- .../init/sql/output/my_sql_pipelines_project/databricks.yml | 3 ++- .../pipelines/template/{{.project_name}}/databricks.yml.tmpl | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml b/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml index 73e251daaa..e4cf682274 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml +++ b/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml @@ -1,7 +1,8 @@ # This is a Databricks pipelines definition for pipelines_project. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -project: +bundle: name: pipelines_project + uuid: [UUID] include: - resources/*.yml diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml b/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml index 260e9a4004..62d321c5e8 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml +++ b/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml @@ -1,7 +1,8 @@ # This is a Databricks pipelines definition for my_pipelines_project. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -project: +bundle: name: my_pipelines_project + uuid: [UUID] include: - resources/*.yml diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml index 81621b8848..7bc84ba717 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml +++ b/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml @@ -1,7 +1,8 @@ # This is a Databricks pipelines definition for my_sql_pipelines_project. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -project: +bundle: name: my_sql_pipelines_project + uuid: [UUID] include: - resources/*.yml diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl index 5b5aa190c6..09e2f9fc97 100644 --- a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl @@ -1,7 +1,8 @@ # This is a Databricks pipelines definition for {{.project_name}}. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. -project: +bundle: name: {{.project_name}} + uuid: {{bundle_uuid}} include: - resources/*.yml From 62adf4d0eb3bd08b3f2cac81cd72064c570b4f0c Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Wed, 25 Jun 2025 11:21:01 -0700 Subject: [PATCH 04/13] removed resources directory --- .../pipelines/init/error-cases/.ruff.toml | 2 +- .../pipelines/init/error-cases/output.txt | 24 +++---- .../.vscode/__builtins__.pyi | 0 .../.vscode/extensions.json | 0 .../output/my_project}/.vscode/settings.json | 2 +- .../output/my_project}/README.md | 11 +--- .../databricks.yml | 5 +- .../my_project/my_project_pipeline}/README.md | 6 +- .../explorations/sample_exploration.ipynb | 63 ++++++++++++++++++ .../my_project.pipeline.yml} | 4 +- .../sample_trips_my_project.py} | 2 +- .../sample_zones_my_project.py} | 4 +- .../my_project_pipeline}/utilities/utils.py | 0 .../.gitignore => my_project/out.gitignore} | 0 acceptance/pipelines/init/error-cases/script | 13 ++-- acceptance/pipelines/init/python/.ruff.toml | 2 +- acceptance/pipelines/init/python/input.json | 2 +- acceptance/pipelines/init/python/output.txt | 2 +- .../.vscode/__builtins__.pyi | 0 .../.vscode/extensions.json | 0 .../my_python_project}/.vscode/settings.json | 2 +- .../output/my_python_project}/README.md | 11 +--- .../databricks.yml | 5 +- .../my_python_project_pipeline}/README.md | 6 +- .../explorations/sample_exploration.ipynb | 63 ++++++++++++++++++ .../my_python_project.pipeline.yml} | 4 +- .../sample_trips_my_python_project.py} | 2 +- .../sample_zones_my_python_project.py} | 4 +- .../utilities/utils.py | 0 .../out.gitignore} | 0 acceptance/pipelines/init/python/script | 3 + acceptance/pipelines/init/sql/.ruff.toml | 2 +- acceptance/pipelines/init/sql/input.json | 2 +- acceptance/pipelines/init/sql/output.txt | 2 +- .../.vscode/__builtins__.pyi | 0 .../.vscode/extensions.json | 0 .../.vscode/settings.json | 2 +- .../README.md | 11 +--- .../databricks.yml | 5 +- .../my_sql_project_pipeline}/README.md | 6 +- .../explorations/sample_exploration.ipynb | 64 +++++++++++++++++++ .../my_sql_project.pipeline.yml} | 4 +- .../sample_trips_my_sql_project.sql} | 2 +- .../sample_zones_my_sql_project.sql} | 4 +- .../out.gitignore} | 0 acceptance/pipelines/init/sql/script | 4 ++ .../pipelines/databricks_template_schema.json | 2 +- .../pipelines/template/__preamble.tmpl | 10 +-- .../template/{{.project_name}}/README.md.tmpl | 7 +- .../{{.project_name}}/databricks.yml.tmpl | 1 - .../{{.project_name}}_pipeline/README.md.tmpl | 0 .../sample_exploration.ipynb.tmpl | 0 .../sample_trips_{{.project_name}}.py.tmpl | 0 .../sample_trips_{{.project_name}}.sql.tmpl | 0 .../sample_zones_{{.project_name}}.py.tmpl | 0 .../sample_zones_{{.project_name}}.sql.tmpl | 0 .../utilities/utils.py | 0 .../{{.project_name}}.pipeline.yml.tmpl | 0 58 files changed, 273 insertions(+), 97 deletions(-) rename acceptance/pipelines/init/error-cases/output/{pipelines_project => my_project}/.vscode/__builtins__.pyi (100%) rename acceptance/pipelines/init/error-cases/output/{pipelines_project => my_project}/.vscode/extensions.json (100%) rename acceptance/pipelines/init/{python/output/my_pipelines_project => error-cases/output/my_project}/.vscode/settings.json (90%) rename acceptance/pipelines/init/{python/output/my_pipelines_project => error-cases/output/my_project}/README.md (76%) rename acceptance/pipelines/init/error-cases/output/{pipelines_project => my_project}/databricks.yml (92%) rename acceptance/pipelines/init/{python/output/my_pipelines_project/resources/my_pipelines_project_pipeline => error-cases/output/my_project/my_project_pipeline}/README.md (81%) create mode 100644 acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/explorations/sample_exploration.ipynb rename acceptance/pipelines/init/error-cases/output/{pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml => my_project/my_project_pipeline/my_project.pipeline.yml} (75%) rename acceptance/pipelines/init/error-cases/output/{pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py => my_project/my_project_pipeline/transformations/sample_trips_my_project.py} (90%) rename acceptance/pipelines/init/error-cases/output/{pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py => my_project/my_project_pipeline/transformations/sample_zones_my_project.py} (80%) rename acceptance/pipelines/init/error-cases/output/{pipelines_project/resources/pipelines_project_pipeline => my_project/my_project_pipeline}/utilities/utils.py (100%) rename acceptance/pipelines/init/error-cases/output/{pipelines_project/.gitignore => my_project/out.gitignore} (100%) rename acceptance/pipelines/init/python/output/{my_pipelines_project => my_python_project}/.vscode/__builtins__.pyi (100%) rename acceptance/pipelines/init/python/output/{my_pipelines_project => my_python_project}/.vscode/extensions.json (100%) rename acceptance/pipelines/init/{error-cases/output/pipelines_project => python/output/my_python_project}/.vscode/settings.json (92%) rename acceptance/pipelines/init/{error-cases/output/pipelines_project => python/output/my_python_project}/README.md (78%) rename acceptance/pipelines/init/python/output/{my_pipelines_project => my_python_project}/databricks.yml (91%) rename acceptance/pipelines/init/{error-cases/output/pipelines_project/resources/pipelines_project_pipeline => python/output/my_python_project/my_python_project_pipeline}/README.md (86%) create mode 100644 acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/explorations/sample_exploration.ipynb rename acceptance/pipelines/init/python/output/{my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml => my_python_project/my_python_project_pipeline/my_python_project.pipeline.yml} (74%) rename acceptance/pipelines/init/python/output/{my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py => my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py} (90%) rename acceptance/pipelines/init/python/output/{my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py => my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py} (79%) rename acceptance/pipelines/init/python/output/{my_pipelines_project/resources/my_pipelines_project_pipeline => my_python_project/my_python_project_pipeline}/utilities/utils.py (100%) rename acceptance/pipelines/init/python/output/{my_pipelines_project/.gitignore => my_python_project/out.gitignore} (100%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project => my_sql_project}/.vscode/__builtins__.pyi (100%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project => my_sql_project}/.vscode/extensions.json (100%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project => my_sql_project}/.vscode/settings.json (90%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project => my_sql_project}/README.md (76%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project => my_sql_project}/databricks.yml (91%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline => my_sql_project/my_sql_project_pipeline}/README.md (78%) create mode 100644 acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/explorations/sample_exploration.ipynb rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml => my_sql_project/my_sql_project_pipeline/my_sql_project.pipeline.yml} (72%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql => my_sql_project/my_sql_project_pipeline/transformations/sample_trips_my_sql_project.sql} (75%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql => my_sql_project/my_sql_project_pipeline/transformations/sample_zones_my_sql_project.sql} (66%) rename acceptance/pipelines/init/sql/output/{my_sql_pipelines_project/.gitignore => my_sql_project/out.gitignore} (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/README.md.tmpl (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/utilities/utils.py (100%) rename libs/template/templates/pipelines/template/{{.project_name}}/{resources => }/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl (100%) diff --git a/acceptance/pipelines/init/error-cases/.ruff.toml b/acceptance/pipelines/init/error-cases/.ruff.toml index 1ab316576b..bb9a1ba8bb 100644 --- a/acceptance/pipelines/init/error-cases/.ruff.toml +++ b/acceptance/pipelines/init/error-cases/.ruff.toml @@ -1,2 +1,2 @@ [format] -exclude = ["*.py"] +exclude = ["*.py", ".ipynb"] diff --git a/acceptance/pipelines/init/error-cases/output.txt b/acceptance/pipelines/init/error-cases/output.txt index 9a93cff052..cff257dea5 100644 --- a/acceptance/pipelines/init/error-cases/output.txt +++ b/acceptance/pipelines/init/error-cases/output.txt @@ -3,8 +3,18 @@ >>> errcode [CLI] install-pipelines-cli -d ./subdir pipelines successfully installed in directory "./subdir" +=== Test with missing config file +>>> errcode ./subdir/pipelines init --output-dir output + +Welcome to the template for pipelines! + + +Your new project has been created in the 'my_project' directory! + +Refer to the README.md file for "getting started" instructions! + === Test with invalid project name (contains uppercase letters) ->>> errcode ./subdir/pipelines init --config-file ./invalid_input.json --output-dir output +>>> errcode ./subdir/pipelines init --config-file ./invalid_input.json --output-dir invalid-output Error: failed to load config from file ./invalid_input.json: invalid value for project_name: "InvalidProjectName". Name must consist of lower case letters, numbers, and underscores. Usage: pipelines init [flags] @@ -24,18 +34,8 @@ Error: failed to load config from file ./invalid_input.json: invalid value for p Exit code: 1 -=== Test with missing config file ->>> errcode ./subdir/pipelines init --output-dir output - -Welcome to the template for pipelines! - - -Your new project has been created in the 'pipelines_project' directory! - -Refer to the README.md file for "getting started" instructions! - === Test with non-existent config file ->>> errcode ./subdir/pipelines init --config-file ./nonexistent.json --output-dir output +>>> errcode ./subdir/pipelines init --config-file ./nonexistent.json --output-dir invalid-output-2 Error: failed to load config from file ./nonexistent.json: open ./nonexistent.json: no such file or directory Usage: pipelines init [flags] diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/__builtins__.pyi similarity index 100% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/__builtins__.pyi rename to acceptance/pipelines/init/error-cases/output/my_project/.vscode/__builtins__.pyi diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/extensions.json b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/extensions.json similarity index 100% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/extensions.json rename to acceptance/pipelines/init/error-cases/output/my_project/.vscode/extensions.json diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/settings.json similarity index 90% rename from acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json rename to acceptance/pipelines/init/error-cases/output/my_project/.vscode/settings.json index 9b1344f30c..09a01b181c 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/settings.json +++ b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/settings.json @@ -8,7 +8,7 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["resources/my_pipelines_project_pipeline"], + "python.analysis.extraPaths": ["resources/my_project_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/README.md b/acceptance/pipelines/init/error-cases/output/my_project/README.md similarity index 76% rename from acceptance/pipelines/init/python/output/my_pipelines_project/README.md rename to acceptance/pipelines/init/error-cases/output/my_project/README.md index 54485ec4f2..8514ebd053 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/README.md +++ b/acceptance/pipelines/init/error-cases/output/my_project/README.md @@ -1,6 +1,6 @@ -# my_pipelines_project +# my_project -The 'my_pipelines_project' project was generated by using the Pipelines template. +The 'my_project' project was generated by using the Pipelines template. ## Setup @@ -30,12 +30,7 @@ The 'my_pipelines_project' project was generated by using the Pipelines template $ pipelines deploy --target prod ``` -3. Use the "summary" comand to review everything that was deployed: - ``` - $ pipelines summary - ``` - -4. To run a job or pipeline, use the "run" command: +3. To run a job or pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml similarity index 92% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml rename to acceptance/pipelines/init/error-cases/output/my_project/databricks.yml index e4cf682274..048cf90305 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/databricks.yml +++ b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml @@ -1,7 +1,7 @@ -# This is a Databricks pipelines definition for pipelines_project. +# This is a Databricks pipelines definition for my_project. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: - name: pipelines_project + name: my_project uuid: [UUID] include: @@ -26,7 +26,6 @@ targets: # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true - deploy_on_run: true workspace: host: [DATABRICKS_URL] variables: diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/README.md similarity index 81% rename from acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md rename to acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/README.md index 2fdc1443ee..858fc7db0d 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/README.md +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/README.md @@ -1,6 +1,6 @@ -# my_pipelines_project_pipeline +# my_project_pipeline -This folder defines all source code for the my_pipelines_project_pipeline pipeline: +This folder defines all source code for the my_project_pipeline pipeline: - `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. - `transformations`: All dataset definitions and transformations. @@ -12,7 +12,7 @@ This folder defines all source code for the my_pipelines_project_pipeline pipeli To get started, go to the `transformations` folder -- most of the relevant source code lives there: * By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_my_pipelines_project.py" to get familiar with the syntax. +* Take a look at the sample under "sample_trips_my_project.py" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. * Use `Run file` to run and preview a single transformation. * Use `Run pipeline` to run _all_ transformations in the entire pipeline. diff --git a/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/explorations/sample_exploration.ipynb b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..7368a6c4c0 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/explorations/sample_exploration.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM hive_metastore.[USERNAME].my_project\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/my_project.pipeline.yml similarity index 75% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml rename to acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/my_project.pipeline.yml index 84191e234d..e3b1e3ae74 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/pipelines_project.pipeline.yml +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/my_project.pipeline.yml @@ -1,7 +1,7 @@ resources: pipelines: - pipelines_project_pipeline: - name: pipelines_project_pipeline + my_project_pipeline: + name: my_project_pipeline serverless: true channel: "PREVIEW" catalog: ${var.catalog} diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py similarity index 90% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py rename to acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py index 3462df33df..8b9196403e 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_trips_pipelines_project.py +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py @@ -9,7 +9,7 @@ @dlt.table -def sample_trips_pipelines_project(): +def sample_trips_my_project(): return ( spark.read.table("samples.nyctaxi.trips") .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py similarity index 80% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py rename to acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py index 51c8c87769..ca0eda976e 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/transformations/sample_zones_pipelines_project.py +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py @@ -8,10 +8,10 @@ @dlt.table -def sample_zones_pipelines_project(): +def sample_zones_my_project(): # Read from the "sample_trips" table, then sum all the fares return ( - spark.read.table("sample_trips_pipelines_project") + spark.read.table("sample_trips_my_project") .groupBy(col("pickup_zip")) .agg( sum("fare_amount").alias("total_fare") diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/utilities/utils.py b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/utilities/utils.py similarity index 100% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/utilities/utils.py rename to acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/utilities/utils.py diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.gitignore b/acceptance/pipelines/init/error-cases/output/my_project/out.gitignore similarity index 100% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/.gitignore rename to acceptance/pipelines/init/error-cases/output/my_project/out.gitignore diff --git a/acceptance/pipelines/init/error-cases/script b/acceptance/pipelines/init/error-cases/script index afb083187c..7f0a247bb2 100644 --- a/acceptance/pipelines/init/error-cases/script +++ b/acceptance/pipelines/init/error-cases/script @@ -5,15 +5,18 @@ mkdir -p $tmpdir title "Install pipelines CLI" trace errcode $CLI install-pipelines-cli -d $tmpdir -title "Test with invalid project name (contains uppercase letters)" -echo '{"project_name": "InvalidProjectName"}' > invalid_input.json -trace errcode $pipelines init --config-file ./invalid_input.json --output-dir output - title "Test with missing config file" trace errcode $pipelines init --output-dir output +title "Test with invalid project name (contains uppercase letters)" +echo '{"project_name": "InvalidProjectName"}' > invalid_input.json +trace errcode $pipelines init --config-file ./invalid_input.json --output-dir invalid-output + title "Test with non-existent config file" -trace errcode $pipelines init --config-file ./nonexistent.json --output-dir output +trace errcode $pipelines init --config-file ./nonexistent.json --output-dir invalid-output-2 + +# Do not affect this repository's git behaviour +mv output/my_project/.gitignore output/my_project/out.gitignore # Clean up rm -f invalid_input.json diff --git a/acceptance/pipelines/init/python/.ruff.toml b/acceptance/pipelines/init/python/.ruff.toml index 1ab316576b..bb9a1ba8bb 100644 --- a/acceptance/pipelines/init/python/.ruff.toml +++ b/acceptance/pipelines/init/python/.ruff.toml @@ -1,2 +1,2 @@ [format] -exclude = ["*.py"] +exclude = ["*.py", ".ipynb"] diff --git a/acceptance/pipelines/init/python/input.json b/acceptance/pipelines/init/python/input.json index f71e40e506..5a1211b99e 100644 --- a/acceptance/pipelines/init/python/input.json +++ b/acceptance/pipelines/init/python/input.json @@ -1,5 +1,5 @@ { - "project_name": "my_pipelines_project", + "project_name": "my_python_project", "default_catalog": "main", "personal_schemas": "yes", "shared_schema": "default", diff --git a/acceptance/pipelines/init/python/output.txt b/acceptance/pipelines/init/python/output.txt index 4b010b9a75..e682f6b135 100644 --- a/acceptance/pipelines/init/python/output.txt +++ b/acceptance/pipelines/init/python/output.txt @@ -9,6 +9,6 @@ pipelines successfully installed in directory "./subdir" Welcome to the template for pipelines! -Your new project has been created in the 'my_pipelines_project' directory! +Your new project has been created in the 'my_python_project' directory! Refer to the README.md file for "getting started" instructions! diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/python/output/my_python_project/.vscode/__builtins__.pyi similarity index 100% rename from acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/__builtins__.pyi rename to acceptance/pipelines/init/python/output/my_python_project/.vscode/__builtins__.pyi diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/extensions.json b/acceptance/pipelines/init/python/output/my_python_project/.vscode/extensions.json similarity index 100% rename from acceptance/pipelines/init/python/output/my_pipelines_project/.vscode/extensions.json rename to acceptance/pipelines/init/python/output/my_python_project/.vscode/extensions.json diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json b/acceptance/pipelines/init/python/output/my_python_project/.vscode/settings.json similarity index 92% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json rename to acceptance/pipelines/init/python/output/my_python_project/.vscode/settings.json index bf3645b890..674e2be3f3 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/.vscode/settings.json +++ b/acceptance/pipelines/init/python/output/my_python_project/.vscode/settings.json @@ -8,7 +8,7 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["resources/pipelines_project_pipeline"], + "python.analysis.extraPaths": ["resources/my_python_project_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/README.md b/acceptance/pipelines/init/python/output/my_python_project/README.md similarity index 78% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/README.md rename to acceptance/pipelines/init/python/output/my_python_project/README.md index ffe836dd36..80e6007a62 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/README.md +++ b/acceptance/pipelines/init/python/output/my_python_project/README.md @@ -1,6 +1,6 @@ -# pipelines_project +# my_python_project -The 'pipelines_project' project was generated by using the Pipelines template. +The 'my_python_project' project was generated by using the Pipelines template. ## Setup @@ -30,12 +30,7 @@ The 'pipelines_project' project was generated by using the Pipelines template. $ pipelines deploy --target prod ``` -3. Use the "summary" comand to review everything that was deployed: - ``` - $ pipelines summary - ``` - -4. To run a job or pipeline, use the "run" command: +3. To run a job or pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml b/acceptance/pipelines/init/python/output/my_python_project/databricks.yml similarity index 91% rename from acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml rename to acceptance/pipelines/init/python/output/my_python_project/databricks.yml index 62d321c5e8..d19a9a225a 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/databricks.yml +++ b/acceptance/pipelines/init/python/output/my_python_project/databricks.yml @@ -1,7 +1,7 @@ -# This is a Databricks pipelines definition for my_pipelines_project. +# This is a Databricks pipelines definition for my_python_project. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: - name: my_pipelines_project + name: my_python_project uuid: [UUID] include: @@ -26,7 +26,6 @@ targets: # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true - deploy_on_run: true workspace: host: [DATABRICKS_URL] variables: diff --git a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/README.md similarity index 86% rename from acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md rename to acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/README.md index 6476007896..f59ec06626 100644 --- a/acceptance/pipelines/init/error-cases/output/pipelines_project/resources/pipelines_project_pipeline/README.md +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/README.md @@ -1,6 +1,6 @@ -# pipelines_project_pipeline +# my_python_project_pipeline -This folder defines all source code for the pipelines_project_pipeline pipeline: +This folder defines all source code for the my_python_project_pipeline pipeline: - `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. - `transformations`: All dataset definitions and transformations. @@ -12,7 +12,7 @@ This folder defines all source code for the pipelines_project_pipeline pipeline: To get started, go to the `transformations` folder -- most of the relevant source code lives there: * By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_pipelines_project.py" to get familiar with the syntax. +* Take a look at the sample under "sample_trips_my_python_project.py" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html. * Use `Run file` to run and preview a single transformation. * Use `Run pipeline` to run _all_ transformations in the entire pipeline. diff --git a/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/explorations/sample_exploration.ipynb b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..dd456692cc --- /dev/null +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/explorations/sample_exploration.ipynb @@ -0,0 +1,63 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "display(spark.sql(\"SELECT * FROM main.[USERNAME].my_python_project\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/my_python_project.pipeline.yml similarity index 74% rename from acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml rename to acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/my_python_project.pipeline.yml index d618c4a518..615e0dc1f2 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/my_pipelines_project.pipeline.yml +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/my_python_project.pipeline.yml @@ -1,7 +1,7 @@ resources: pipelines: - my_pipelines_project_pipeline: - name: my_pipelines_project_pipeline + my_python_project_pipeline: + name: my_python_project_pipeline serverless: true channel: "PREVIEW" catalog: ${var.catalog} diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py similarity index 90% rename from acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py rename to acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py index d1efafa766..7b4584cdf5 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_trips_my_pipelines_project.py +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py @@ -9,7 +9,7 @@ @dlt.table -def sample_trips_my_pipelines_project(): +def sample_trips_my_python_project(): return ( spark.read.table("samples.nyctaxi.trips") .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py similarity index 79% rename from acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py rename to acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py index 957821b904..94b3556414 100644 --- a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/transformations/sample_zones_my_pipelines_project.py +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py @@ -8,10 +8,10 @@ @dlt.table -def sample_zones_my_pipelines_project(): +def sample_zones_my_python_project(): # Read from the "sample_trips" table, then sum all the fares return ( - spark.read.table("sample_trips_my_pipelines_project") + spark.read.table("sample_trips_my_python_project") .groupBy(col("pickup_zip")) .agg( sum("fare_amount").alias("total_fare") diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/utilities/utils.py b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/utilities/utils.py similarity index 100% rename from acceptance/pipelines/init/python/output/my_pipelines_project/resources/my_pipelines_project_pipeline/utilities/utils.py rename to acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/utilities/utils.py diff --git a/acceptance/pipelines/init/python/output/my_pipelines_project/.gitignore b/acceptance/pipelines/init/python/output/my_python_project/out.gitignore similarity index 100% rename from acceptance/pipelines/init/python/output/my_pipelines_project/.gitignore rename to acceptance/pipelines/init/python/output/my_python_project/out.gitignore diff --git a/acceptance/pipelines/init/python/script b/acceptance/pipelines/init/python/script index 0a865a44b7..dcda3a48ac 100644 --- a/acceptance/pipelines/init/python/script +++ b/acceptance/pipelines/init/python/script @@ -7,5 +7,8 @@ trace errcode $CLI install-pipelines-cli -d $tmpdir title "Test basic pipelines init with configuration file" trace $pipelines init --config-file ./input.json --output-dir output + +mv output/my_python_project/.gitignore output/my_python_project/out.gitignore + rm -f $pipelines rm -rf $tmpdir diff --git a/acceptance/pipelines/init/sql/.ruff.toml b/acceptance/pipelines/init/sql/.ruff.toml index 1ab316576b..bb9a1ba8bb 100644 --- a/acceptance/pipelines/init/sql/.ruff.toml +++ b/acceptance/pipelines/init/sql/.ruff.toml @@ -1,2 +1,2 @@ [format] -exclude = ["*.py"] +exclude = ["*.py", ".ipynb"] diff --git a/acceptance/pipelines/init/sql/input.json b/acceptance/pipelines/init/sql/input.json index 41480c730b..ff1edf7336 100644 --- a/acceptance/pipelines/init/sql/input.json +++ b/acceptance/pipelines/init/sql/input.json @@ -1,5 +1,5 @@ { - "project_name": "my_sql_pipelines_project", + "project_name": "my_sql_project", "default_catalog": "main", "personal_schemas": "no", "shared_schema": "shared_dev", diff --git a/acceptance/pipelines/init/sql/output.txt b/acceptance/pipelines/init/sql/output.txt index ad3f464890..2a111901ac 100644 --- a/acceptance/pipelines/init/sql/output.txt +++ b/acceptance/pipelines/init/sql/output.txt @@ -9,6 +9,6 @@ pipelines successfully installed in directory "./subdir" Welcome to the template for pipelines! -Your new project has been created in the 'my_sql_pipelines_project' directory! +Your new project has been created in the 'my_sql_project' directory! Refer to the README.md file for "getting started" instructions! diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/sql/output/my_sql_project/.vscode/__builtins__.pyi similarity index 100% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/__builtins__.pyi rename to acceptance/pipelines/init/sql/output/my_sql_project/.vscode/__builtins__.pyi diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/extensions.json b/acceptance/pipelines/init/sql/output/my_sql_project/.vscode/extensions.json similarity index 100% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/extensions.json rename to acceptance/pipelines/init/sql/output/my_sql_project/.vscode/extensions.json diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json b/acceptance/pipelines/init/sql/output/my_sql_project/.vscode/settings.json similarity index 90% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json rename to acceptance/pipelines/init/sql/output/my_sql_project/.vscode/settings.json index aaa772a7fe..f38f52b03f 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.vscode/settings.json +++ b/acceptance/pipelines/init/sql/output/my_sql_project/.vscode/settings.json @@ -8,7 +8,7 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.analysis.extraPaths": ["resources/my_sql_pipelines_project_pipeline"], + "python.analysis.extraPaths": ["resources/my_sql_project_pipeline"], "files.exclude": { "**/*.egg-info": true, "**/__pycache__": true, diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md b/acceptance/pipelines/init/sql/output/my_sql_project/README.md similarity index 76% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md rename to acceptance/pipelines/init/sql/output/my_sql_project/README.md index f9ac7b56c5..c7d77a966b 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/README.md +++ b/acceptance/pipelines/init/sql/output/my_sql_project/README.md @@ -1,6 +1,6 @@ -# my_sql_pipelines_project +# my_sql_project -The 'my_sql_pipelines_project' project was generated by using the Pipelines template. +The 'my_sql_project' project was generated by using the Pipelines template. ## Setup @@ -30,12 +30,7 @@ The 'my_sql_pipelines_project' project was generated by using the Pipelines temp $ pipelines deploy --target prod ``` -3. Use the "summary" comand to review everything that was deployed: - ``` - $ pipelines summary - ``` - -4. To run a job or pipeline, use the "run" command: +3. To run a job or pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml b/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml similarity index 91% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml rename to acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml index 7bc84ba717..88f5319d2a 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/databricks.yml +++ b/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml @@ -1,7 +1,7 @@ -# This is a Databricks pipelines definition for my_sql_pipelines_project. +# This is a Databricks pipelines definition for my_sql_project. # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. bundle: - name: my_sql_pipelines_project + name: my_sql_project uuid: [UUID] include: @@ -26,7 +26,6 @@ targets: # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true - deploy_on_run: true workspace: host: [DATABRICKS_URL] variables: diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/README.md similarity index 78% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md rename to acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/README.md index 94ec0b8a4f..7df92d59ff 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/README.md +++ b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/README.md @@ -1,6 +1,6 @@ -# my_sql_pipelines_project_pipeline +# my_sql_project_pipeline -This folder defines all source code for the 'my_sql_pipelines_project_pipeline' pipeline: +This folder defines all source code for the 'my_sql_project_pipeline' pipeline: - `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline. - `transformations`: All dataset definitions and transformations. @@ -11,7 +11,7 @@ This folder defines all source code for the 'my_sql_pipelines_project_pipeline' To get started, go to the `transformations` folder -- most of the relevant source code lives there: * By convention, every dataset under `transformations` is in a separate file. -* Take a look at the sample under "sample_trips_my_sql_pipelines_project.sql" to get familiar with the syntax. +* Take a look at the sample under "sample_trips_my_sql_project.sql" to get familiar with the syntax. Read more about the syntax at https://docs.databricks.com/dlt/sql-ref.html. * Use `Run file` to run and preview a single transformation. * Use `Run pipeline` to run _all_ transformations in the entire pipeline. diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/explorations/sample_exploration.ipynb b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/explorations/sample_exploration.ipynb new file mode 100644 index 0000000000..deee8395ea --- /dev/null +++ b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/explorations/sample_exploration.ipynb @@ -0,0 +1,64 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "source": [ + "### Example Exploratory Notebook\n", + "\n", + "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n", + "\n", + "**Note**: This notebook is not executed as part of the pipeline." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "-- !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n", + "\n", + "USE CATALOG `main`;\n", + "USE SCHEMA `shared_dev`;\n", + "\n", + "SELECT * from my_sql_project;" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": null, + "dashboards": [], + "environmentMetadata": null, + "inputWidgetPreferences": null, + "language": "sql", + "notebookMetadata": {}, + "notebookName": "sample_exploration", + "widgets": {} + }, + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/my_sql_project.pipeline.yml similarity index 72% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml rename to acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/my_sql_project.pipeline.yml index 1bb72eacc9..1d11b3f81c 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/my_sql_pipelines_project.pipeline.yml +++ b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/my_sql_project.pipeline.yml @@ -1,7 +1,7 @@ resources: pipelines: - my_sql_pipelines_project_pipeline: - name: my_sql_pipelines_project_pipeline + my_sql_project_pipeline: + name: my_sql_project_pipeline serverless: true channel: "PREVIEW" catalog: ${var.catalog} diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/transformations/sample_trips_my_sql_project.sql similarity index 75% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql rename to acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/transformations/sample_trips_my_sql_project.sql index 5792d69972..41971fa014 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_trips_my_sql_pipelines_project.sql +++ b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/transformations/sample_trips_my_sql_project.sql @@ -2,7 +2,7 @@ -- Edit the sample below or add new transformations -- using "+ Add" in the file browser. -CREATE MATERIALIZED VIEW sample_trips_my_sql_pipelines_project AS +CREATE MATERIALIZED VIEW sample_trips_my_sql_project AS SELECT pickup_zip, fare_amount diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/transformations/sample_zones_my_sql_project.sql similarity index 66% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql rename to acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/transformations/sample_zones_my_sql_project.sql index 1275615d57..eae8c8aa41 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/resources/my_sql_pipelines_project_pipeline/transformations/sample_zones_my_sql_pipelines_project.sql +++ b/acceptance/pipelines/init/sql/output/my_sql_project/my_sql_project_pipeline/transformations/sample_zones_my_sql_project.sql @@ -2,9 +2,9 @@ -- Edit the sample below or add new transformations -- using "+ Add" in the file browser. -CREATE MATERIALIZED VIEW sample_zones_my_sql_pipelines_project AS +CREATE MATERIALIZED VIEW sample_zones_my_sql_project AS SELECT pickup_zip, SUM(fare_amount) AS total_fare -FROM sample_trips_my_sql_pipelines_project +FROM sample_trips_my_sql_project GROUP BY pickup_zip diff --git a/acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.gitignore b/acceptance/pipelines/init/sql/output/my_sql_project/out.gitignore similarity index 100% rename from acceptance/pipelines/init/sql/output/my_sql_pipelines_project/.gitignore rename to acceptance/pipelines/init/sql/output/my_sql_project/out.gitignore diff --git a/acceptance/pipelines/init/sql/script b/acceptance/pipelines/init/sql/script index ba801454f0..91e21dccdc 100644 --- a/acceptance/pipelines/init/sql/script +++ b/acceptance/pipelines/init/sql/script @@ -7,5 +7,9 @@ trace errcode $CLI install-pipelines-cli -d $tmpdir title "Test pipelines init with SQL configuration" trace $pipelines init --config-file ./input.json --output-dir output + +# Do not affect this repository's git behaviour +mv output/my_sql_project/.gitignore output/my_sql_project/out.gitignore + rm -f $pipelines rm -rf $tmpdir diff --git a/libs/template/templates/pipelines/databricks_template_schema.json b/libs/template/templates/pipelines/databricks_template_schema.json index ac9d9deb2c..adc580c044 100644 --- a/libs/template/templates/pipelines/databricks_template_schema.json +++ b/libs/template/templates/pipelines/databricks_template_schema.json @@ -3,7 +3,7 @@ "properties": { "project_name": { "type": "string", - "default": "pipelines_project", + "default": "my_project", "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project\nproject_name", "order": 1, "pattern": "^[a-z0-9_]+$", diff --git a/libs/template/templates/pipelines/template/__preamble.tmpl b/libs/template/templates/pipelines/template/__preamble.tmpl index c6c0c2321f..199ad088a6 100644 --- a/libs/template/templates/pipelines/template/__preamble.tmpl +++ b/libs/template/templates/pipelines/template/__preamble.tmpl @@ -7,10 +7,10 @@ This file only contains template directives; it is skipped for the actual output {{$isSQL := eq .language "sql"}} {{if $isSQL}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py"}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py"}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py"}} {{else}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql"}} - {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql"}} + {{skip "{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql"}} {{end}} diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl index d8166ba389..2abb9b9ed5 100644 --- a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl @@ -30,12 +30,7 @@ The '{{.project_name}}' project was generated by using the Pipelines template. $ pipelines deploy --target prod ``` -3. Use the "summary" comand to review everything that was deployed: - ``` - $ pipelines summary - ``` - -4. To run a job or pipeline, use the "run" command: +3. To run a job or pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl index 09e2f9fc97..ba42045858 100644 --- a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl @@ -26,7 +26,6 @@ targets: # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development default: true - deploy_on_run: true workspace: host: {{workspace_host}} variables: diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/README.md.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/utilities/utils.py rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/resources/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl rename to libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl From be73133024f330f91fb3396bc6525b48eef8d666 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Wed, 25 Jun 2025 12:58:52 -0700 Subject: [PATCH 05/13] windows test fix --- .../pipelines/init/error-cases/output.txt | 8 ++-- .../pipelines/init/error-cases/test.toml | 4 ++ cmd/pipelines/init.go | 47 ++++++++++--------- 3 files changed, 32 insertions(+), 27 deletions(-) create mode 100644 acceptance/pipelines/init/error-cases/test.toml diff --git a/acceptance/pipelines/init/error-cases/output.txt b/acceptance/pipelines/init/error-cases/output.txt index cff257dea5..803eba5f8a 100644 --- a/acceptance/pipelines/init/error-cases/output.txt +++ b/acceptance/pipelines/init/error-cases/output.txt @@ -20,9 +20,9 @@ Usage: pipelines init [flags] Flags: - --config-file string JSON file containing key value pairs of input parameters required for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. -h, --help help for init - --output-dir string Directory to write the initialized template to + --output-dir string Directory to write the initialized template to. Global Flags: --debug enable debug logging @@ -41,9 +41,9 @@ Usage: pipelines init [flags] Flags: - --config-file string JSON file containing key value pairs of input parameters required for template initialization + --config-file string JSON file containing key value pairs of input parameters required for template initialization. -h, --help help for init - --output-dir string Directory to write the initialized template to + --output-dir string Directory to write the initialized template to. Global Flags: --debug enable debug logging diff --git a/acceptance/pipelines/init/error-cases/test.toml b/acceptance/pipelines/init/error-cases/test.toml new file mode 100644 index 0000000000..71ad00b072 --- /dev/null +++ b/acceptance/pipelines/init/error-cases/test.toml @@ -0,0 +1,4 @@ +[[Repls]] +# Windows: +Old = 'The system cannot find the file specified.' +New = 'no such file or directory' diff --git a/cmd/pipelines/init.go b/cmd/pipelines/init.go index 51fb98569e..1577f63e48 100644 --- a/cmd/pipelines/init.go +++ b/cmd/pipelines/init.go @@ -7,37 +7,38 @@ import ( ) func initCommand() *cobra.Command { - var outputDir string - var configFile string cmd := &cobra.Command{ Use: "init", Short: "Initialize a new pipelines project", PreRunE: root.MustWorkspaceClient, Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - ctx := cmd.Context() + } + var configFile string + var outputDir string + cmd.Flags().StringVar(&configFile, "config-file", "", "JSON file containing key value pairs of input parameters required for template initialization.") + cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to.") + + cmd.RunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() - r := template.Resolver{ - TemplatePathOrUrl: "pipelines", - ConfigFile: configFile, - OutputDir: outputDir, - } + r := template.Resolver{ + TemplatePathOrUrl: "pipelines", + ConfigFile: configFile, + OutputDir: outputDir, + } - tmpl, err := r.Resolve(ctx) - if err != nil { - return err - } - defer tmpl.Reader.Cleanup(ctx) + tmpl, err := r.Resolve(ctx) + if err != nil { + return err + } + defer tmpl.Reader.Cleanup(ctx) - err = tmpl.Writer.Materialize(ctx, tmpl.Reader) - if err != nil { - return err - } - tmpl.Writer.LogTelemetry(ctx) - return nil - }, + err = tmpl.Writer.Materialize(ctx, tmpl.Reader) + if err != nil { + return err + } + tmpl.Writer.LogTelemetry(ctx) + return nil } - cmd.Flags().StringVar(&outputDir, "output-dir", "", "Directory to write the initialized template to") - cmd.Flags().StringVar(&configFile, "config-file", "", "JSON file containing key value pairs of input parameters required for template initialization") return cmd } From 737e9f727cfc1a814b4f8b834242c93386b78ba1 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Thu, 26 Jun 2025 13:57:48 -0700 Subject: [PATCH 06/13] new root implementation for pipelines --- .../pipelines/init/error-cases/output.txt | 30 ------- .../install-pipelines-cli/output.txt | 8 +- cmd/pipelines/pipelines.go | 16 +--- cmd/pipelines/root.go | 84 +++++++++++++++++++ cmd/root/auth.go | 2 +- cmd/root/bundle.go | 2 +- cmd/root/bundle_test.go | 8 +- cmd/root/io.go | 8 +- cmd/root/logger.go | 10 +-- cmd/root/progress_logger.go | 12 +-- cmd/root/progress_logger_test.go | 18 ++-- cmd/root/root.go | 37 ++++---- cmd/root/user_agent_command.go | 8 +- cmd/root/user_agent_command_exec_id.go | 2 +- cmd/root/user_agent_command_exec_id_test.go | 2 +- cmd/root/user_agent_command_test.go | 8 +- cmd/root/user_agent_upstream.go | 2 +- cmd/root/user_agent_upstream_test.go | 10 +-- 18 files changed, 154 insertions(+), 113 deletions(-) create mode 100644 cmd/pipelines/root.go diff --git a/acceptance/pipelines/init/error-cases/output.txt b/acceptance/pipelines/init/error-cases/output.txt index 803eba5f8a..4dc86f2324 100644 --- a/acceptance/pipelines/init/error-cases/output.txt +++ b/acceptance/pipelines/init/error-cases/output.txt @@ -16,41 +16,11 @@ Refer to the README.md file for "getting started" instructions! === Test with invalid project name (contains uppercase letters) >>> errcode ./subdir/pipelines init --config-file ./invalid_input.json --output-dir invalid-output Error: failed to load config from file ./invalid_input.json: invalid value for project_name: "InvalidProjectName". Name must consist of lower case letters, numbers, and underscores. -Usage: - pipelines init [flags] - -Flags: - --config-file string JSON file containing key value pairs of input parameters required for template initialization. - -h, --help help for init - --output-dir string Directory to write the initialized template to. - -Global Flags: - --debug enable debug logging - -o, --output type output type: text or json (default text) - -p, --profile string ~/.databrickscfg profile - -t, --target string bundle target to use (if applicable) - -Error: failed to load config from file ./invalid_input.json: invalid value for project_name: "InvalidProjectName". Name must consist of lower case letters, numbers, and underscores. Exit code: 1 === Test with non-existent config file >>> errcode ./subdir/pipelines init --config-file ./nonexistent.json --output-dir invalid-output-2 Error: failed to load config from file ./nonexistent.json: open ./nonexistent.json: no such file or directory -Usage: - pipelines init [flags] - -Flags: - --config-file string JSON file containing key value pairs of input parameters required for template initialization. - -h, --help help for init - --output-dir string Directory to write the initialized template to. - -Global Flags: - --debug enable debug logging - -o, --output type output type: text or json (default text) - -p, --profile string ~/.databrickscfg profile - -t, --target string bundle target to use (if applicable) - -Error: failed to load config from file ./nonexistent.json: open ./nonexistent.json: no such file or directory Exit code: 1 diff --git a/acceptance/pipelines/install-pipelines-cli/output.txt b/acceptance/pipelines/install-pipelines-cli/output.txt index b2abcce147..5a74d25481 100644 --- a/acceptance/pipelines/install-pipelines-cli/output.txt +++ b/acceptance/pipelines/install-pipelines-cli/output.txt @@ -4,10 +4,9 @@ pipelines successfully installed in directory "./subdir" >>> errcode ./subdir/pipelines -Pipelines CLI (stub, to be filled in) +Pipelines CLI Usage: - pipelines [flags] pipelines [command] Available Commands: @@ -21,6 +20,7 @@ Flags: -o, --output type output type: text or json (default text) -p, --profile string ~/.databrickscfg profile -t, --target string bundle target to use (if applicable) + -v, --version version for pipelines Use "pipelines [command] --help" for more information about a command. @@ -39,10 +39,9 @@ Exit code: 1 pipelines successfully installed in directory "./subdir" >>> errcode ./subdir/pipelines -Pipelines CLI (stub, to be filled in) +Pipelines CLI Usage: - pipelines [flags] pipelines [command] Available Commands: @@ -56,5 +55,6 @@ Flags: -o, --output type output type: text or json (default text) -p, --profile string ~/.databrickscfg profile -t, --target string bundle target to use (if applicable) + -v, --version version for pipelines Use "pipelines [command] --help" for more information about a command. diff --git a/cmd/pipelines/pipelines.go b/cmd/pipelines/pipelines.go index 511fd0ee2a..d5a5bf5193 100644 --- a/cmd/pipelines/pipelines.go +++ b/cmd/pipelines/pipelines.go @@ -3,21 +3,11 @@ package pipelines import ( "context" - "github.com/databricks/cli/cmd/root" "github.com/spf13/cobra" ) func New(ctx context.Context) *cobra.Command { - cmd := &cobra.Command{ - Use: "pipelines", - Short: "Pipelines CLI", - Long: "Pipelines CLI (stub, to be filled in)", - Run: func(cmd *cobra.Command, args []string) { - _ = cmd.Help() - }, - } - root.SetupRootCommand(ctx, cmd) - - cmd.AddCommand(initCommand()) - return cmd + cli := NewRoot(ctx) + cli.AddCommand(initCommand()) + return cli } diff --git a/cmd/pipelines/root.go b/cmd/pipelines/root.go new file mode 100644 index 0000000000..c976a1127b --- /dev/null +++ b/cmd/pipelines/root.go @@ -0,0 +1,84 @@ +package pipelines + +import ( + "context" + "log/slog" + "os" + "strings" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/log" + "github.com/spf13/cobra" +) + +// NewRoot is copied from cmd/root/root.go and adapted for pipelines use. +func NewRoot(ctx context.Context) *cobra.Command { + cmd := &cobra.Command{ + Use: "pipelines", + Short: "Pipelines CLI", + Version: build.GetInfo().Version, + + // Cobra prints the usage string to stderr if a command returns an error. + // This usage string should only be displayed if an invalid combination of flags + // is specified and not when runtime errors occur (e.g. resource not found). + // The usage string is include in [flagErrorFunc] for flag errors only. + SilenceUsage: true, + + // Silence error printing by cobra. Errors are printed through cmdio. + SilenceErrors: true, + } + + // Pass the context along through the command during initialization. + // It will be overwritten when the command is executed. + cmd.SetContext(ctx) + + // Initialize flags + logFlags := root.InitLogFlags(cmd) + progressLoggerFlag := root.InitProgressLoggerFlag(cmd, logFlags) + outputFlag := root.InitOutputFlag(cmd) + root.InitProfileFlag(cmd) + root.InitTargetFlag(cmd) + + cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + + // Configure default logger. + ctx, err := logFlags.InitializeContext(ctx) + if err != nil { + return err + } + + logger := log.GetLogger(ctx) + logger.Info("start", + slog.String("version", build.GetInfo().Version), + slog.String("args", strings.Join(os.Args, ", "))) + + // Configure progress logger + ctx, err = progressLoggerFlag.InitializeContext(ctx) + if err != nil { + return err + } + // set context, so that initializeIO can have the current context + cmd.SetContext(ctx) + + // Configure command IO + err = outputFlag.InitializeIO(cmd) + if err != nil { + return err + } + // get the context back + ctx = cmd.Context() + + // Configure our user agent with the command that's about to be executed. + ctx = root.WithCommandInUserAgent(ctx, cmd) + ctx = root.WithCommandExecIdInUserAgent(ctx) + ctx = root.WithUpstreamInUserAgent(ctx) + cmd.SetContext(ctx) + return nil + } + + cmd.SetFlagErrorFunc(root.FlagErrorFunc) + cmd.SetVersionTemplate("Pipelines CLI v{{.Version}} (based on Databricks CLI v{{.Version}})\n") + return cmd +} diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 01e79959b3..5007e88d4c 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -47,7 +47,7 @@ func isCannotConfigureAuth(err error) bool { // Referenced by cmd/labs/project/entrypoint.go. var ErrCannotConfigureAuth = errors.New("cannot configure default credentials, please check https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication to configure credentials for your preferred authentication method.") -func initProfileFlag(cmd *cobra.Command) { +func InitProfileFlag(cmd *cobra.Command) { cmd.PersistentFlags().StringP("profile", "p", "", "~/.databrickscfg profile") cmd.RegisterFlagCompletionFunc("profile", profile.ProfileCompletion) } diff --git a/cmd/root/bundle.go b/cmd/root/bundle.go index 99c278e2f5..48a1489930 100644 --- a/cmd/root/bundle.go +++ b/cmd/root/bundle.go @@ -168,7 +168,7 @@ func targetCompletion(cmd *cobra.Command, args []string, toComplete string) ([]s return maps.Keys(b.Config.Targets), cobra.ShellCompDirectiveDefault } -func initTargetFlag(cmd *cobra.Command) { +func InitTargetFlag(cmd *cobra.Command) { // To operate in the context of a bundle, all commands must take an "target" parameter. cmd.PersistentFlags().StringP("target", "t", "", "bundle target to use (if applicable)") cmd.RegisterFlagCompletionFunc("target", targetCompletion) diff --git a/cmd/root/bundle_test.go b/cmd/root/bundle_test.go index 2e7b60b864..db23d1eb2b 100644 --- a/cmd/root/bundle_test.go +++ b/cmd/root/bundle_test.go @@ -34,7 +34,7 @@ func emptyCommand(t *testing.T) *cobra.Command { ctx := context.Background() cmd := &cobra.Command{} cmd.SetContext(ctx) - initProfileFlag(cmd) + InitProfileFlag(cmd) return cmd } @@ -209,7 +209,7 @@ func TestBundleConfigureProfileFlagAndEnvVariable(t *testing.T) { func TestTargetFlagFull(t *testing.T) { cmd := emptyCommand(t) - initTargetFlag(cmd) + InitTargetFlag(cmd) cmd.SetArgs([]string{"version", "--target", "development"}) ctx := context.Background() @@ -221,7 +221,7 @@ func TestTargetFlagFull(t *testing.T) { func TestTargetFlagShort(t *testing.T) { cmd := emptyCommand(t) - initTargetFlag(cmd) + InitTargetFlag(cmd) cmd.SetArgs([]string{"version", "-t", "production"}) ctx := context.Background() @@ -234,7 +234,7 @@ func TestTargetFlagShort(t *testing.T) { // TODO: remove when environment flag is fully deprecated func TestTargetEnvironmentFlag(t *testing.T) { cmd := emptyCommand(t) - initTargetFlag(cmd) + InitTargetFlag(cmd) initEnvironmentFlag(cmd) cmd.SetArgs([]string{"version", "--environment", "development"}) diff --git a/cmd/root/io.go b/cmd/root/io.go index bba989a796..48a478aa86 100644 --- a/cmd/root/io.go +++ b/cmd/root/io.go @@ -9,12 +9,12 @@ import ( const envOutputFormat = "DATABRICKS_OUTPUT_FORMAT" -type outputFlag struct { +type OutputFlag struct { output flags.Output } -func initOutputFlag(cmd *cobra.Command) *outputFlag { - f := outputFlag{ +func InitOutputFlag(cmd *cobra.Command) *OutputFlag { + f := OutputFlag{ output: flags.OutputText, } @@ -37,7 +37,7 @@ func OutputType(cmd *cobra.Command) flags.Output { return *f } -func (f *outputFlag) initializeIO(cmd *cobra.Command) error { +func (f *OutputFlag) InitializeIO(cmd *cobra.Command) error { var headerTemplate, template string if cmd.Annotations != nil { // rely on zeroval being an empty string diff --git a/cmd/root/logger.go b/cmd/root/logger.go index 38e09b9c9f..21bd8691fc 100644 --- a/cmd/root/logger.go +++ b/cmd/root/logger.go @@ -20,14 +20,14 @@ const ( envLogFormat = "DATABRICKS_LOG_FORMAT" ) -type logFlags struct { +type LogFlags struct { file flags.LogFileFlag level flags.LogLevelFlag output flags.Output debug bool } -func (f *logFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error) { +func (f *LogFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error) { switch f.output { case flags.OutputJSON: return slog.NewJSONHandler(f.file.Writer(), &opts), nil @@ -43,7 +43,7 @@ func (f *logFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error } } -func (f *logFlags) initializeContext(ctx context.Context) (context.Context, error) { +func (f *LogFlags) InitializeContext(ctx context.Context) (context.Context, error) { if f.debug { err := f.level.Set("debug") if err != nil { @@ -74,8 +74,8 @@ func (f *logFlags) initializeContext(ctx context.Context) (context.Context, erro return log.NewContext(ctx, slog.Default()), nil } -func initLogFlags(cmd *cobra.Command) *logFlags { - f := logFlags{ +func InitLogFlags(cmd *cobra.Command) *LogFlags { + f := LogFlags{ file: flags.NewLogFileFlag(), level: flags.NewLogLevelFlag(), output: flags.OutputText, diff --git a/cmd/root/progress_logger.go b/cmd/root/progress_logger.go index 0cc49b2ac8..70e12d2090 100644 --- a/cmd/root/progress_logger.go +++ b/cmd/root/progress_logger.go @@ -14,13 +14,13 @@ import ( const envProgressFormat = "DATABRICKS_CLI_PROGRESS_FORMAT" -type progressLoggerFlag struct { +type ProgressLoggerFlag struct { flags.ProgressLogFormat - log *logFlags + log *LogFlags } -func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) flags.ProgressLogFormat { +func (f *ProgressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) flags.ProgressLogFormat { if (f.log.level.String() == "disabled" || f.log.file.String() != "stderr") && term.IsTerminal(int(os.Stderr.Fd())) { return flags.ModeInplace @@ -28,7 +28,7 @@ func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) return flags.ModeAppend } -func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Context, error) { +func (f *ProgressLoggerFlag) InitializeContext(ctx context.Context) (context.Context, error) { // No need to initialize the logger if it's already set in the context. This // happens in unit tests where the logger is setup as a fixture. if _, ok := cmdio.FromContext(ctx); ok { @@ -49,8 +49,8 @@ func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Con return cmdio.NewContext(ctx, progressLogger), nil } -func initProgressLoggerFlag(cmd *cobra.Command, logFlags *logFlags) *progressLoggerFlag { - f := progressLoggerFlag{ +func InitProgressLoggerFlag(cmd *cobra.Command, logFlags *LogFlags) *ProgressLoggerFlag { + f := ProgressLoggerFlag{ ProgressLogFormat: flags.NewProgressLogFormat(), log: logFlags, diff --git a/cmd/root/progress_logger_test.go b/cmd/root/progress_logger_test.go index 42ba1bdc6c..6c9d2b8ff0 100644 --- a/cmd/root/progress_logger_test.go +++ b/cmd/root/progress_logger_test.go @@ -13,8 +13,8 @@ import ( type progressLoggerTest struct { *cobra.Command - *logFlags - *progressLoggerFlag + *LogFlags + *ProgressLoggerFlag } func initializeProgressLoggerTest(t *testing.T) ( @@ -26,9 +26,9 @@ func initializeProgressLoggerTest(t *testing.T) ( plt := &progressLoggerTest{ Command: &cobra.Command{}, } - plt.logFlags = initLogFlags(plt.Command) - plt.progressLoggerFlag = initProgressLoggerFlag(plt.Command, plt.logFlags) - return plt, &plt.logFlags.level, &plt.logFlags.file, &plt.progressLoggerFlag.ProgressLogFormat + plt.LogFlags = InitLogFlags(plt.Command) + plt.ProgressLoggerFlag = InitProgressLoggerFlag(plt.Command, plt.LogFlags) + return plt, &plt.LogFlags.level, &plt.LogFlags.file, &plt.ProgressLoggerFlag.ProgressLogFormat } func TestInitializeErrorOnIncompatibleConfig(t *testing.T) { @@ -36,7 +36,7 @@ func TestInitializeErrorOnIncompatibleConfig(t *testing.T) { require.NoError(t, logLevel.Set("info")) require.NoError(t, logFile.Set("stderr")) require.NoError(t, progressFormat.Set("inplace")) - _, err := plt.progressLoggerFlag.initializeContext(context.Background()) + _, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) assert.ErrorContains(t, err, "inplace progress logging cannot be used when log-file is stderr") } @@ -45,7 +45,7 @@ func TestNoErrorOnDisabledLogLevel(t *testing.T) { require.NoError(t, logLevel.Set("disabled")) require.NoError(t, logFile.Set("stderr")) require.NoError(t, progressFormat.Set("inplace")) - _, err := plt.progressLoggerFlag.initializeContext(context.Background()) + _, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) assert.NoError(t, err) } @@ -54,14 +54,14 @@ func TestNoErrorOnNonStderrLogFile(t *testing.T) { require.NoError(t, logLevel.Set("info")) require.NoError(t, logFile.Set("stdout")) require.NoError(t, progressFormat.Set("inplace")) - _, err := plt.progressLoggerFlag.initializeContext(context.Background()) + _, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) assert.NoError(t, err) } func TestDefaultLoggerModeResolution(t *testing.T) { plt, _, _, progressFormat := initializeProgressLoggerTest(t) require.Equal(t, *progressFormat, flags.ModeDefault) - ctx, err := plt.progressLoggerFlag.initializeContext(context.Background()) + ctx, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) require.NoError(t, err) logger, ok := cmdio.FromContext(ctx) assert.True(t, ok) diff --git a/cmd/root/root.go b/cmd/root/root.go index 3c7c3c70e4..09f9090891 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -21,6 +21,7 @@ import ( "github.com/spf13/cobra" ) +// New is copied to cmd/pipelines/root.go and adapted for pipelines use. func New(ctx context.Context) *cobra.Command { cmd := &cobra.Command{ Use: "databricks", @@ -37,29 +38,23 @@ func New(ctx context.Context) *cobra.Command { SilenceErrors: true, } - SetupRootCommand(ctx, cmd) - cmd.SetVersionTemplate("Databricks CLI v{{.Version}}\n") - return cmd -} - -func SetupRootCommand(ctx context.Context, cmd *cobra.Command) { // Pass the context along through the command during initialization. // It will be overwritten when the command is executed. cmd.SetContext(ctx) // Initialize flags - logFlags := initLogFlags(cmd) - progressLoggerFlag := initProgressLoggerFlag(cmd, logFlags) - outputFlag := initOutputFlag(cmd) - initProfileFlag(cmd) + logFlags := InitLogFlags(cmd) + progressLoggerFlag := InitProgressLoggerFlag(cmd, logFlags) + outputFlag := InitOutputFlag(cmd) + InitProfileFlag(cmd) initEnvironmentFlag(cmd) - initTargetFlag(cmd) + InitTargetFlag(cmd) cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() // Configure default logger. - ctx, err := logFlags.initializeContext(ctx) + ctx, err := logFlags.InitializeContext(ctx) if err != nil { return err } @@ -70,7 +65,7 @@ func SetupRootCommand(ctx context.Context, cmd *cobra.Command) { slog.String("args", strings.Join(os.Args, ", "))) // Configure progress logger - ctx, err = progressLoggerFlag.initializeContext(ctx) + ctx, err = progressLoggerFlag.InitializeContext(ctx) if err != nil { return err } @@ -78,7 +73,7 @@ func SetupRootCommand(ctx context.Context, cmd *cobra.Command) { cmd.SetContext(ctx) // Configure command IO - err = outputFlag.initializeIO(cmd) + err = outputFlag.InitializeIO(cmd) if err != nil { return err } @@ -86,18 +81,20 @@ func SetupRootCommand(ctx context.Context, cmd *cobra.Command) { ctx = cmd.Context() // Configure our user agent with the command that's about to be executed. - ctx = withCommandInUserAgent(ctx, cmd) - ctx = withCommandExecIdInUserAgent(ctx) - ctx = withUpstreamInUserAgent(ctx) + ctx = WithCommandInUserAgent(ctx, cmd) + ctx = WithCommandExecIdInUserAgent(ctx) + ctx = WithUpstreamInUserAgent(ctx) cmd.SetContext(ctx) return nil } - cmd.SetFlagErrorFunc(flagErrorFunc) + cmd.SetFlagErrorFunc(FlagErrorFunc) + cmd.SetVersionTemplate("Databricks CLI v{{.Version}}\n") + return cmd } // Wrap flag errors to include the usage string. -func flagErrorFunc(c *cobra.Command, err error) error { +func FlagErrorFunc(c *cobra.Command, err error) error { return fmt.Errorf("%w\n\n%s", err, c.UsageString()) } @@ -173,7 +170,7 @@ Stack Trace: exitCode = 1 } - commandStr := commandString(cmd) + commandStr := CommandString(cmd) ctx = cmd.Context() // Log bundle deploy failures. Only log if we have successfully configured diff --git a/cmd/root/user_agent_command.go b/cmd/root/user_agent_command.go index 306f2d7bfa..887292d25f 100644 --- a/cmd/root/user_agent_command.go +++ b/cmd/root/user_agent_command.go @@ -13,9 +13,9 @@ import ( // See unit test [main.TestCommandsDontUseUnderscoreInName]. const commandSeparator = "_" -// commandString walks up the command hierarchy of the specified +// CommandString walks up the command hierarchy of the specified // command to build a string representing this hierarchy. -func commandString(cmd *cobra.Command) string { +func CommandString(cmd *cobra.Command) string { reversed := []string{cmd.Name()} cmd.VisitParents(func(p *cobra.Command) { if !p.HasParent() { @@ -32,6 +32,6 @@ func commandString(cmd *cobra.Command) string { return strings.Join(ordered, commandSeparator) } -func withCommandInUserAgent(ctx context.Context, cmd *cobra.Command) context.Context { - return useragent.InContext(ctx, "cmd", commandString(cmd)) +func WithCommandInUserAgent(ctx context.Context, cmd *cobra.Command) context.Context { + return useragent.InContext(ctx, "cmd", CommandString(cmd)) } diff --git a/cmd/root/user_agent_command_exec_id.go b/cmd/root/user_agent_command_exec_id.go index 22b8fc3f64..6bd3f6a274 100644 --- a/cmd/root/user_agent_command_exec_id.go +++ b/cmd/root/user_agent_command_exec_id.go @@ -7,7 +7,7 @@ import ( "github.com/databricks/databricks-sdk-go/useragent" ) -func withCommandExecIdInUserAgent(ctx context.Context) context.Context { +func WithCommandExecIdInUserAgent(ctx context.Context) context.Context { // A UUID that will allow us to correlate multiple API requests made by // the same CLI invocation. return useragent.InContext(ctx, "cmd-exec-id", cmdctx.ExecId(ctx)) diff --git a/cmd/root/user_agent_command_exec_id_test.go b/cmd/root/user_agent_command_exec_id_test.go index 652d0ddd39..08552a980e 100644 --- a/cmd/root/user_agent_command_exec_id_test.go +++ b/cmd/root/user_agent_command_exec_id_test.go @@ -11,7 +11,7 @@ import ( func TestWithCommandExecIdInUserAgent(t *testing.T) { ctx := cmdctx.GenerateExecId(context.Background()) - ctx = withCommandExecIdInUserAgent(ctx) + ctx = WithCommandExecIdInUserAgent(ctx) // user agent should contain cmd-exec-id/ ua := useragent.FromContext(ctx) diff --git a/cmd/root/user_agent_command_test.go b/cmd/root/user_agent_command_test.go index a3f5bbcb1c..0f4615aedc 100644 --- a/cmd/root/user_agent_command_test.go +++ b/cmd/root/user_agent_command_test.go @@ -25,11 +25,11 @@ func TestWithCommandInUserAgent(t *testing.T) { root.AddCommand(hello) hello.AddCommand(world) - assert.Equal(t, "root", commandString(root)) - assert.Equal(t, "hello", commandString(hello)) - assert.Equal(t, "hello_world", commandString(world)) + assert.Equal(t, "root", CommandString(root)) + assert.Equal(t, "hello", CommandString(hello)) + assert.Equal(t, "hello_world", CommandString(world)) - ctx := withCommandInUserAgent(context.Background(), world) + ctx := WithCommandInUserAgent(context.Background(), world) ua := useragent.FromContext(ctx) assert.Contains(t, ua, "cmd/hello_world") diff --git a/cmd/root/user_agent_upstream.go b/cmd/root/user_agent_upstream.go index a813e8ee74..de53a80ab2 100644 --- a/cmd/root/user_agent_upstream.go +++ b/cmd/root/user_agent_upstream.go @@ -19,7 +19,7 @@ const ( upstreamVersionKey = "upstream-version" ) -func withUpstreamInUserAgent(ctx context.Context) context.Context { +func WithUpstreamInUserAgent(ctx context.Context) context.Context { value := env.Get(ctx, upstreamEnvVar) if value == "" { return ctx diff --git a/cmd/root/user_agent_upstream_test.go b/cmd/root/user_agent_upstream_test.go index fc6ea0c75d..9981f5d4ae 100644 --- a/cmd/root/user_agent_upstream_test.go +++ b/cmd/root/user_agent_upstream_test.go @@ -10,20 +10,20 @@ import ( func TestUpstreamSet(t *testing.T) { t.Setenv(upstreamEnvVar, "foobar") - ctx := withUpstreamInUserAgent(context.Background()) + ctx := WithUpstreamInUserAgent(context.Background()) assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") } func TestUpstreamSetEmpty(t *testing.T) { t.Setenv(upstreamEnvVar, "") - ctx := withUpstreamInUserAgent(context.Background()) + ctx := WithUpstreamInUserAgent(context.Background()) assert.NotContains(t, useragent.FromContext(ctx), "upstream/") } func TestUpstreamVersionSet(t *testing.T) { t.Setenv(upstreamEnvVar, "foobar") t.Setenv(upstreamVersionEnvVar, "0.0.1") - ctx := withUpstreamInUserAgent(context.Background()) + ctx := WithUpstreamInUserAgent(context.Background()) assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") assert.Contains(t, useragent.FromContext(ctx), "upstream-version/0.0.1") } @@ -31,7 +31,7 @@ func TestUpstreamVersionSet(t *testing.T) { func TestUpstreamVersionSetEmpty(t *testing.T) { t.Setenv(upstreamEnvVar, "foobar") t.Setenv(upstreamVersionEnvVar, "") - ctx := withUpstreamInUserAgent(context.Background()) + ctx := WithUpstreamInUserAgent(context.Background()) assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") assert.NotContains(t, useragent.FromContext(ctx), "upstream-version/") } @@ -39,7 +39,7 @@ func TestUpstreamVersionSetEmpty(t *testing.T) { func TestUpstreamVersionSetUpstreamNotSet(t *testing.T) { t.Setenv(upstreamEnvVar, "") t.Setenv(upstreamVersionEnvVar, "0.0.1") - ctx := withUpstreamInUserAgent(context.Background()) + ctx := WithUpstreamInUserAgent(context.Background()) assert.NotContains(t, useragent.FromContext(ctx), "upstream/") assert.NotContains(t, useragent.FromContext(ctx), "upstream-version/") } From a64c91ce847ac25bc934c26b54f627e311f49da6 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Sun, 29 Jun 2025 20:50:48 -0700 Subject: [PATCH 07/13] pipelines descriptions --- .../init/error-cases/output/my_project/README.md | 13 +++++++++---- .../error-cases/output/my_project/databricks.yml | 4 +--- .../init/python/output/my_python_project/README.md | 13 +++++++++---- .../python/output/my_python_project/databricks.yml | 4 +--- .../init/sql/output/my_sql_project/README.md | 13 +++++++++---- .../init/sql/output/my_sql_project/databricks.yml | 4 +--- cmd/pipelines/root.go | 1 - .../template/{{.project_name}}/README.md.tmpl | 13 +++++++++---- .../template/{{.project_name}}/databricks.yml.tmpl | 4 +--- 9 files changed, 40 insertions(+), 29 deletions(-) diff --git a/acceptance/pipelines/init/error-cases/output/my_project/README.md b/acceptance/pipelines/init/error-cases/output/my_project/README.md index 8514ebd053..b4e82b9564 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/README.md +++ b/acceptance/pipelines/init/error-cases/output/my_project/README.md @@ -6,17 +6,22 @@ The 'my_project' project was generated by using the Pipelines template. 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html -2. Authenticate to your Databricks workspace, if you have not done so already: +2. Install the Pipelines CLI: + ``` + $ databricks install-pipelines-cli + ``` + +3. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks auth login ``` -3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Deploying resources +## Deploying pipelines 1. To deploy a development copy of this project, type: ``` @@ -30,7 +35,7 @@ The 'my_project' project was generated by using the Pipelines template. $ pipelines deploy --target prod ``` -3. To run a job or pipeline, use the "run" command: +3. To run a pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml index 048cf90305..871656882c 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml +++ b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml @@ -21,9 +21,7 @@ variables: targets: dev: # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + # - Deployed pipelines get prefixed with '[dev my_user_name]' mode: development default: true workspace: diff --git a/acceptance/pipelines/init/python/output/my_python_project/README.md b/acceptance/pipelines/init/python/output/my_python_project/README.md index 80e6007a62..47bf2d1236 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/README.md +++ b/acceptance/pipelines/init/python/output/my_python_project/README.md @@ -6,17 +6,22 @@ The 'my_python_project' project was generated by using the Pipelines template. 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html -2. Authenticate to your Databricks workspace, if you have not done so already: +2. Install the Pipelines CLI: + ``` + $ databricks install-pipelines-cli + ``` + +3. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks auth login ``` -3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Deploying resources +## Deploying pipelines 1. To deploy a development copy of this project, type: ``` @@ -30,7 +35,7 @@ The 'my_python_project' project was generated by using the Pipelines template. $ pipelines deploy --target prod ``` -3. To run a job or pipeline, use the "run" command: +3. To run a pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/acceptance/pipelines/init/python/output/my_python_project/databricks.yml b/acceptance/pipelines/init/python/output/my_python_project/databricks.yml index d19a9a225a..f9b7ef40de 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/databricks.yml +++ b/acceptance/pipelines/init/python/output/my_python_project/databricks.yml @@ -21,9 +21,7 @@ variables: targets: dev: # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + # - Deployed pipelines get prefixed with '[dev my_user_name]' mode: development default: true workspace: diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/README.md b/acceptance/pipelines/init/sql/output/my_sql_project/README.md index c7d77a966b..b47837085a 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_project/README.md +++ b/acceptance/pipelines/init/sql/output/my_sql_project/README.md @@ -6,17 +6,22 @@ The 'my_sql_project' project was generated by using the Pipelines template. 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html -2. Authenticate to your Databricks workspace, if you have not done so already: +2. Install the Pipelines CLI: + ``` + $ databricks install-pipelines-cli + ``` + +3. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks auth login ``` -3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Deploying resources +## Deploying pipelines 1. To deploy a development copy of this project, type: ``` @@ -30,7 +35,7 @@ The 'my_sql_project' project was generated by using the Pipelines template. $ pipelines deploy --target prod ``` -3. To run a job or pipeline, use the "run" command: +3. To run a pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml b/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml index 88f5319d2a..42f2da0f42 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml +++ b/acceptance/pipelines/init/sql/output/my_sql_project/databricks.yml @@ -21,9 +21,7 @@ variables: targets: dev: # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + # - Deployed pipelines get prefixed with '[dev my_user_name]' mode: development default: true workspace: diff --git a/cmd/pipelines/root.go b/cmd/pipelines/root.go index c976a1127b..22ed34c7e0 100644 --- a/cmd/pipelines/root.go +++ b/cmd/pipelines/root.go @@ -79,6 +79,5 @@ func NewRoot(ctx context.Context) *cobra.Command { } cmd.SetFlagErrorFunc(root.FlagErrorFunc) - cmd.SetVersionTemplate("Pipelines CLI v{{.Version}} (based on Databricks CLI v{{.Version}})\n") return cmd } diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl index 2abb9b9ed5..b3cf02df7d 100644 --- a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl @@ -6,17 +6,22 @@ The '{{.project_name}}' project was generated by using the Pipelines template. 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html -2. Authenticate to your Databricks workspace, if you have not done so already: +2. Install the Pipelines CLI: + ``` + $ databricks install-pipelines-cli + ``` + +3. Authenticate to your Databricks workspace, if you have not done so already: ``` $ databricks auth login ``` -3. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from +4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from https://www.databricks.com/blog/announcing-pycharm-integration-databricks. -## Deploying resources +## Deploying pipelines 1. To deploy a development copy of this project, type: ``` @@ -30,7 +35,7 @@ The '{{.project_name}}' project was generated by using the Pipelines template. $ pipelines deploy --target prod ``` -3. To run a job or pipeline, use the "run" command: +3. To run a pipeline, use the "run" command: ``` $ pipelines run ``` diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl index ba42045858..ffcc6ba7b1 100644 --- a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl @@ -21,9 +21,7 @@ variables: targets: dev: # The default target uses 'mode: development' to create a development copy. - # - Deployed resources get prefixed with '[dev my_user_name]' - # - Any job schedules and triggers are paused by default. - # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. + # - Deployed pipelines get prefixed with '[dev my_user_name]' mode: development default: true workspace: From 04b06cc562cee8798c42834ad6329b09c95b95c9 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Mon, 30 Jun 2025 09:30:52 -0700 Subject: [PATCH 08/13] remove .ruff.toml files --- acceptance/pipelines/init/error-cases/.ruff.toml | 2 -- acceptance/pipelines/init/python/.ruff.toml | 2 -- acceptance/pipelines/init/sql/.ruff.toml | 2 -- ruff.toml | 2 +- 4 files changed, 1 insertion(+), 7 deletions(-) delete mode 100644 acceptance/pipelines/init/error-cases/.ruff.toml delete mode 100644 acceptance/pipelines/init/python/.ruff.toml delete mode 100644 acceptance/pipelines/init/sql/.ruff.toml diff --git a/acceptance/pipelines/init/error-cases/.ruff.toml b/acceptance/pipelines/init/error-cases/.ruff.toml deleted file mode 100644 index bb9a1ba8bb..0000000000 --- a/acceptance/pipelines/init/error-cases/.ruff.toml +++ /dev/null @@ -1,2 +0,0 @@ -[format] -exclude = ["*.py", ".ipynb"] diff --git a/acceptance/pipelines/init/python/.ruff.toml b/acceptance/pipelines/init/python/.ruff.toml deleted file mode 100644 index bb9a1ba8bb..0000000000 --- a/acceptance/pipelines/init/python/.ruff.toml +++ /dev/null @@ -1,2 +0,0 @@ -[format] -exclude = ["*.py", ".ipynb"] diff --git a/acceptance/pipelines/init/sql/.ruff.toml b/acceptance/pipelines/init/sql/.ruff.toml deleted file mode 100644 index bb9a1ba8bb..0000000000 --- a/acceptance/pipelines/init/sql/.ruff.toml +++ /dev/null @@ -1,2 +0,0 @@ -[format] -exclude = ["*.py", ".ipynb"] diff --git a/ruff.toml b/ruff.toml index 20b36e203a..c146fff15c 100644 --- a/ruff.toml +++ b/ruff.toml @@ -4,5 +4,5 @@ line-length = 150 exclude = [ "tagging.py", # tagging.py is synced from universe in the `openapi/tagging` directory and follows different format rules. "acceptance/bundle/templates/lakeflow-pipelines/**/*.py", # files are manually formatted - "acceptance/bundle/templates/pipelines/**/*.py" # files are manually formatted + "acceptance/pipelines/init/**/*.py" # files are manually formatted ] From 7661aad0e4a34585e96c3cc486301a991f04ab23 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Mon, 30 Jun 2025 10:23:55 -0700 Subject: [PATCH 09/13] new root copy --- .../install-pipelines-cli/output.txt | 4 +- cmd/pipelines/pipelines.go | 3 +- cmd/pipelines/root/auth.go | 11 ++ cmd/pipelines/root/bundle.go | 33 ++++++ cmd/pipelines/root/io.go | 53 +++++++++ cmd/pipelines/root/logger.go | 111 ++++++++++++++++++ cmd/pipelines/root/progress_logger.go | 70 +++++++++++ cmd/pipelines/{ => root}/root.go | 37 +++--- cmd/pipelines/root/user_agent_command.go | 37 ++++++ .../root/user_agent_command_exec_id.go | 14 +++ cmd/pipelines/root/user_agent_upstream.go | 37 ++++++ cmd/root/auth.go | 2 +- cmd/root/bundle.go | 2 +- cmd/root/bundle_test.go | 8 +- cmd/root/io.go | 8 +- cmd/root/logger.go | 10 +- cmd/root/progress_logger.go | 12 +- cmd/root/progress_logger_test.go | 18 +-- cmd/root/root.go | 29 +++-- cmd/root/user_agent_command.go | 8 +- cmd/root/user_agent_command_exec_id.go | 2 +- cmd/root/user_agent_command_exec_id_test.go | 2 +- cmd/root/user_agent_command_test.go | 8 +- cmd/root/user_agent_upstream.go | 2 +- cmd/root/user_agent_upstream_test.go | 10 +- 25 files changed, 451 insertions(+), 80 deletions(-) create mode 100644 cmd/pipelines/root/auth.go create mode 100644 cmd/pipelines/root/bundle.go create mode 100644 cmd/pipelines/root/io.go create mode 100644 cmd/pipelines/root/logger.go create mode 100644 cmd/pipelines/root/progress_logger.go rename cmd/pipelines/{ => root}/root.go (67%) create mode 100644 cmd/pipelines/root/user_agent_command.go create mode 100644 cmd/pipelines/root/user_agent_command_exec_id.go create mode 100644 cmd/pipelines/root/user_agent_upstream.go diff --git a/acceptance/pipelines/install-pipelines-cli/output.txt b/acceptance/pipelines/install-pipelines-cli/output.txt index 5a74d25481..3c109daa8c 100644 --- a/acceptance/pipelines/install-pipelines-cli/output.txt +++ b/acceptance/pipelines/install-pipelines-cli/output.txt @@ -19,7 +19,7 @@ Flags: -h, --help help for pipelines -o, --output type output type: text or json (default text) -p, --profile string ~/.databrickscfg profile - -t, --target string bundle target to use (if applicable) + -t, --target string project target to use (if applicable) -v, --version version for pipelines Use "pipelines [command] --help" for more information about a command. @@ -54,7 +54,7 @@ Flags: -h, --help help for pipelines -o, --output type output type: text or json (default text) -p, --profile string ~/.databrickscfg profile - -t, --target string bundle target to use (if applicable) + -t, --target string project target to use (if applicable) -v, --version version for pipelines Use "pipelines [command] --help" for more information about a command. diff --git a/cmd/pipelines/pipelines.go b/cmd/pipelines/pipelines.go index d5a5bf5193..7a9d6c6a8b 100644 --- a/cmd/pipelines/pipelines.go +++ b/cmd/pipelines/pipelines.go @@ -3,11 +3,12 @@ package pipelines import ( "context" + "github.com/databricks/cli/cmd/pipelines/root" "github.com/spf13/cobra" ) func New(ctx context.Context) *cobra.Command { - cli := NewRoot(ctx) + cli := root.New(ctx) cli.AddCommand(initCommand()) return cli } diff --git a/cmd/pipelines/root/auth.go b/cmd/pipelines/root/auth.go new file mode 100644 index 0000000000..5991f18efd --- /dev/null +++ b/cmd/pipelines/root/auth.go @@ -0,0 +1,11 @@ +package root + +import ( + "github.com/databricks/cli/libs/databrickscfg/profile" + "github.com/spf13/cobra" +) + +func initProfileFlag(cmd *cobra.Command) { + cmd.PersistentFlags().StringP("profile", "p", "", "~/.databrickscfg profile") + cmd.RegisterFlagCompletionFunc("profile", profile.ProfileCompletion) +} diff --git a/cmd/pipelines/root/bundle.go b/cmd/pipelines/root/bundle.go new file mode 100644 index 0000000000..8c18846216 --- /dev/null +++ b/cmd/pipelines/root/bundle.go @@ -0,0 +1,33 @@ +package root + +import ( + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/phases" + "github.com/spf13/cobra" + "golang.org/x/exp/maps" +) + +// targetCompletion executes to autocomplete the argument to the target flag. +func targetCompletion(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + ctx := cmd.Context() + b, err := bundle.MustLoad(ctx) + if err != nil { + cobra.CompErrorln(err.Error()) + return nil, cobra.ShellCompDirectiveError + } + + // Load project but don't select a target (we're completing those). + diags := phases.Load(ctx, b) + if err := diags.Error(); err != nil { + cobra.CompErrorln(err.Error()) + return nil, cobra.ShellCompDirectiveError + } + + return maps.Keys(b.Config.Targets), cobra.ShellCompDirectiveDefault +} + +func initTargetFlag(cmd *cobra.Command) { + // To operate in the context of a project, all commands must take an "target" parameter. + cmd.PersistentFlags().StringP("target", "t", "", "project target to use (if applicable)") + cmd.RegisterFlagCompletionFunc("target", targetCompletion) +} diff --git a/cmd/pipelines/root/io.go b/cmd/pipelines/root/io.go new file mode 100644 index 0000000000..bba989a796 --- /dev/null +++ b/cmd/pipelines/root/io.go @@ -0,0 +1,53 @@ +package root + +import ( + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/flags" + "github.com/spf13/cobra" +) + +const envOutputFormat = "DATABRICKS_OUTPUT_FORMAT" + +type outputFlag struct { + output flags.Output +} + +func initOutputFlag(cmd *cobra.Command) *outputFlag { + f := outputFlag{ + output: flags.OutputText, + } + + // Configure defaults from environment, if applicable. + // If the provided value is invalid it is ignored. + if v, ok := env.Lookup(cmd.Context(), envOutputFormat); ok { + f.output.Set(v) //nolint:errcheck + } + + cmd.PersistentFlags().VarP(&f.output, "output", "o", "output type: text or json") + return &f +} + +func OutputType(cmd *cobra.Command) flags.Output { + f, ok := cmd.Flag("output").Value.(*flags.Output) + if !ok { + panic("output flag not defined") + } + + return *f +} + +func (f *outputFlag) initializeIO(cmd *cobra.Command) error { + var headerTemplate, template string + if cmd.Annotations != nil { + // rely on zeroval being an empty string + template = cmd.Annotations["template"] + headerTemplate = cmd.Annotations["headerTemplate"] + } + + ctx := cmd.Context() + cmdIO := cmdio.NewIO(ctx, f.output, cmd.InOrStdin(), cmd.OutOrStdout(), cmd.ErrOrStderr(), headerTemplate, template) + ctx = cmdio.InContext(ctx, cmdIO) + cmd.SetContext(ctx) + return nil +} diff --git a/cmd/pipelines/root/logger.go b/cmd/pipelines/root/logger.go new file mode 100644 index 0000000000..38e09b9c9f --- /dev/null +++ b/cmd/pipelines/root/logger.go @@ -0,0 +1,111 @@ +package root + +import ( + "context" + "fmt" + "log/slog" + "os" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/flags" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/log/handler" + "github.com/spf13/cobra" +) + +const ( + envLogFile = "DATABRICKS_LOG_FILE" + envLogLevel = "DATABRICKS_LOG_LEVEL" + envLogFormat = "DATABRICKS_LOG_FORMAT" +) + +type logFlags struct { + file flags.LogFileFlag + level flags.LogLevelFlag + output flags.Output + debug bool +} + +func (f *logFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error) { + switch f.output { + case flags.OutputJSON: + return slog.NewJSONHandler(f.file.Writer(), &opts), nil + case flags.OutputText: + w := f.file.Writer() + return handler.NewFriendlyHandler(w, &handler.Options{ + Color: cmdio.IsTTY(w), + Level: opts.Level, + ReplaceAttr: opts.ReplaceAttr, + }), nil + default: + return nil, fmt.Errorf("invalid log output mode: %s", f.output) + } +} + +func (f *logFlags) initializeContext(ctx context.Context) (context.Context, error) { + if f.debug { + err := f.level.Set("debug") + if err != nil { + return nil, err + } + } + + opts := slog.HandlerOptions{} + opts.Level = f.level.Level() + opts.AddSource = true + opts.ReplaceAttr = log.ReplaceAttrFunctions{ + log.ReplaceLevelAttr, + log.ReplaceSourceAttr, + }.ReplaceAttr + + // Open the underlying log file if the user configured an actual file to log to. + err := f.file.Open() + if err != nil { + return nil, err + } + + handler, err := f.makeLogHandler(opts) + if err != nil { + return nil, err + } + + slog.SetDefault(slog.New(handler).With(slog.Int("pid", os.Getpid()))) + return log.NewContext(ctx, slog.Default()), nil +} + +func initLogFlags(cmd *cobra.Command) *logFlags { + f := logFlags{ + file: flags.NewLogFileFlag(), + level: flags.NewLogLevelFlag(), + output: flags.OutputText, + } + + // Configure defaults from environment, if applicable. + // If the provided value is invalid it is ignored. + if v, ok := env.Lookup(cmd.Context(), envLogFile); ok { + f.file.Set(v) //nolint:errcheck + } + if v, ok := env.Lookup(cmd.Context(), envLogLevel); ok { + f.level.Set(v) //nolint:errcheck + } + if v, ok := env.Lookup(cmd.Context(), envLogFormat); ok { + f.output.Set(v) //nolint:errcheck + } + + flags := cmd.PersistentFlags() + flags.BoolVar(&f.debug, "debug", false, "enable debug logging") + flags.Var(&f.file, "log-file", "file to write logs to") + flags.Var(&f.level, "log-level", "log level") + flags.Var(&f.output, "log-format", "log output format (text or json)") + + // mark fine-grained flags hidden from global --help + flags.MarkHidden("log-file") + flags.MarkHidden("log-level") + flags.MarkHidden("log-format") + + cmd.RegisterFlagCompletionFunc("log-file", f.file.Complete) + cmd.RegisterFlagCompletionFunc("log-level", f.level.Complete) + cmd.RegisterFlagCompletionFunc("log-format", f.output.Complete) + return &f +} diff --git a/cmd/pipelines/root/progress_logger.go b/cmd/pipelines/root/progress_logger.go new file mode 100644 index 0000000000..0cc49b2ac8 --- /dev/null +++ b/cmd/pipelines/root/progress_logger.go @@ -0,0 +1,70 @@ +package root + +import ( + "context" + "errors" + "os" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/flags" + "github.com/spf13/cobra" + "golang.org/x/term" +) + +const envProgressFormat = "DATABRICKS_CLI_PROGRESS_FORMAT" + +type progressLoggerFlag struct { + flags.ProgressLogFormat + + log *logFlags +} + +func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) flags.ProgressLogFormat { + if (f.log.level.String() == "disabled" || f.log.file.String() != "stderr") && + term.IsTerminal(int(os.Stderr.Fd())) { + return flags.ModeInplace + } + return flags.ModeAppend +} + +func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Context, error) { + // No need to initialize the logger if it's already set in the context. This + // happens in unit tests where the logger is setup as a fixture. + if _, ok := cmdio.FromContext(ctx); ok { + return ctx, nil + } + + if f.log.level.String() != "disabled" && f.log.file.String() == "stderr" && + f.ProgressLogFormat == flags.ModeInplace { + return nil, errors.New("inplace progress logging cannot be used when log-file is stderr") + } + + format := f.ProgressLogFormat + if format == flags.ModeDefault { + format = f.resolveModeDefault(format) + } + + progressLogger := cmdio.NewLogger(format) + return cmdio.NewContext(ctx, progressLogger), nil +} + +func initProgressLoggerFlag(cmd *cobra.Command, logFlags *logFlags) *progressLoggerFlag { + f := progressLoggerFlag{ + ProgressLogFormat: flags.NewProgressLogFormat(), + + log: logFlags, + } + + // Configure defaults from environment, if applicable. + // If the provided value is invalid it is ignored. + if v, ok := env.Lookup(cmd.Context(), envProgressFormat); ok { + _ = f.Set(v) + } + + flags := cmd.PersistentFlags() + flags.Var(&f.ProgressLogFormat, "progress-format", "format for progress logs (append, inplace, json)") + flags.MarkHidden("progress-format") + cmd.RegisterFlagCompletionFunc("progress-format", f.ProgressLogFormat.Complete) + return &f +} diff --git a/cmd/pipelines/root.go b/cmd/pipelines/root/root.go similarity index 67% rename from cmd/pipelines/root.go rename to cmd/pipelines/root/root.go index 22ed34c7e0..4d51714461 100644 --- a/cmd/pipelines/root.go +++ b/cmd/pipelines/root/root.go @@ -1,19 +1,19 @@ -package pipelines +package root import ( "context" + "fmt" "log/slog" "os" "strings" - "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/log" "github.com/spf13/cobra" ) -// NewRoot is copied from cmd/root/root.go and adapted for pipelines use. -func NewRoot(ctx context.Context) *cobra.Command { +// New is copied from cmd/root/root.go and adapted for pipelines use. +func New(ctx context.Context) *cobra.Command { cmd := &cobra.Command{ Use: "pipelines", Short: "Pipelines CLI", @@ -34,17 +34,17 @@ func NewRoot(ctx context.Context) *cobra.Command { cmd.SetContext(ctx) // Initialize flags - logFlags := root.InitLogFlags(cmd) - progressLoggerFlag := root.InitProgressLoggerFlag(cmd, logFlags) - outputFlag := root.InitOutputFlag(cmd) - root.InitProfileFlag(cmd) - root.InitTargetFlag(cmd) + logFlags := initLogFlags(cmd) + progressLoggerFlag := initProgressLoggerFlag(cmd, logFlags) + outputFlag := initOutputFlag(cmd) + initProfileFlag(cmd) + initTargetFlag(cmd) cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() // Configure default logger. - ctx, err := logFlags.InitializeContext(ctx) + ctx, err := logFlags.initializeContext(ctx) if err != nil { return err } @@ -55,7 +55,7 @@ func NewRoot(ctx context.Context) *cobra.Command { slog.String("args", strings.Join(os.Args, ", "))) // Configure progress logger - ctx, err = progressLoggerFlag.InitializeContext(ctx) + ctx, err = progressLoggerFlag.initializeContext(ctx) if err != nil { return err } @@ -63,7 +63,7 @@ func NewRoot(ctx context.Context) *cobra.Command { cmd.SetContext(ctx) // Configure command IO - err = outputFlag.InitializeIO(cmd) + err = outputFlag.initializeIO(cmd) if err != nil { return err } @@ -71,13 +71,18 @@ func NewRoot(ctx context.Context) *cobra.Command { ctx = cmd.Context() // Configure our user agent with the command that's about to be executed. - ctx = root.WithCommandInUserAgent(ctx, cmd) - ctx = root.WithCommandExecIdInUserAgent(ctx) - ctx = root.WithUpstreamInUserAgent(ctx) + ctx = withCommandInUserAgent(ctx, cmd) + ctx = withCommandExecIdInUserAgent(ctx) + ctx = withUpstreamInUserAgent(ctx) cmd.SetContext(ctx) return nil } - cmd.SetFlagErrorFunc(root.FlagErrorFunc) + cmd.SetFlagErrorFunc(flagErrorFunc) return cmd } + +// Wrap flag errors to include the usage string. +func flagErrorFunc(c *cobra.Command, err error) error { + return fmt.Errorf("%w\n\n%s", err, c.UsageString()) +} diff --git a/cmd/pipelines/root/user_agent_command.go b/cmd/pipelines/root/user_agent_command.go new file mode 100644 index 0000000000..306f2d7bfa --- /dev/null +++ b/cmd/pipelines/root/user_agent_command.go @@ -0,0 +1,37 @@ +package root + +import ( + "context" + "strings" + + "github.com/databricks/databricks-sdk-go/useragent" + "github.com/spf13/cobra" +) + +// commandSeparator joins command names in a command hierachy. +// We enforce no command name contains this character. +// See unit test [main.TestCommandsDontUseUnderscoreInName]. +const commandSeparator = "_" + +// commandString walks up the command hierarchy of the specified +// command to build a string representing this hierarchy. +func commandString(cmd *cobra.Command) string { + reversed := []string{cmd.Name()} + cmd.VisitParents(func(p *cobra.Command) { + if !p.HasParent() { + return + } + reversed = append(reversed, p.Name()) + }) + + ordered := make([]string, 0, len(reversed)) + for i := len(reversed) - 1; i >= 0; i-- { + ordered = append(ordered, reversed[i]) + } + + return strings.Join(ordered, commandSeparator) +} + +func withCommandInUserAgent(ctx context.Context, cmd *cobra.Command) context.Context { + return useragent.InContext(ctx, "cmd", commandString(cmd)) +} diff --git a/cmd/pipelines/root/user_agent_command_exec_id.go b/cmd/pipelines/root/user_agent_command_exec_id.go new file mode 100644 index 0000000000..22b8fc3f64 --- /dev/null +++ b/cmd/pipelines/root/user_agent_command_exec_id.go @@ -0,0 +1,14 @@ +package root + +import ( + "context" + + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/databricks-sdk-go/useragent" +) + +func withCommandExecIdInUserAgent(ctx context.Context) context.Context { + // A UUID that will allow us to correlate multiple API requests made by + // the same CLI invocation. + return useragent.InContext(ctx, "cmd-exec-id", cmdctx.ExecId(ctx)) +} diff --git a/cmd/pipelines/root/user_agent_upstream.go b/cmd/pipelines/root/user_agent_upstream.go new file mode 100644 index 0000000000..a813e8ee74 --- /dev/null +++ b/cmd/pipelines/root/user_agent_upstream.go @@ -0,0 +1,37 @@ +package root + +import ( + "context" + + "github.com/databricks/cli/libs/env" + "github.com/databricks/databricks-sdk-go/useragent" +) + +// Environment variables that caller can set to convey what is upstream to this CLI. +const ( + upstreamEnvVar = "DATABRICKS_CLI_UPSTREAM" + upstreamVersionEnvVar = "DATABRICKS_CLI_UPSTREAM_VERSION" +) + +// Keys in the user agent. +const ( + upstreamKey = "upstream" + upstreamVersionKey = "upstream-version" +) + +func withUpstreamInUserAgent(ctx context.Context) context.Context { + value := env.Get(ctx, upstreamEnvVar) + if value == "" { + return ctx + } + + ctx = useragent.InContext(ctx, upstreamKey, value) + + // Include upstream version as well, if set. + value = env.Get(ctx, upstreamVersionEnvVar) + if value == "" { + return ctx + } + + return useragent.InContext(ctx, upstreamVersionKey, value) +} diff --git a/cmd/root/auth.go b/cmd/root/auth.go index 5007e88d4c..01e79959b3 100644 --- a/cmd/root/auth.go +++ b/cmd/root/auth.go @@ -47,7 +47,7 @@ func isCannotConfigureAuth(err error) bool { // Referenced by cmd/labs/project/entrypoint.go. var ErrCannotConfigureAuth = errors.New("cannot configure default credentials, please check https://docs.databricks.com/en/dev-tools/auth.html#databricks-client-unified-authentication to configure credentials for your preferred authentication method.") -func InitProfileFlag(cmd *cobra.Command) { +func initProfileFlag(cmd *cobra.Command) { cmd.PersistentFlags().StringP("profile", "p", "", "~/.databrickscfg profile") cmd.RegisterFlagCompletionFunc("profile", profile.ProfileCompletion) } diff --git a/cmd/root/bundle.go b/cmd/root/bundle.go index 48a1489930..99c278e2f5 100644 --- a/cmd/root/bundle.go +++ b/cmd/root/bundle.go @@ -168,7 +168,7 @@ func targetCompletion(cmd *cobra.Command, args []string, toComplete string) ([]s return maps.Keys(b.Config.Targets), cobra.ShellCompDirectiveDefault } -func InitTargetFlag(cmd *cobra.Command) { +func initTargetFlag(cmd *cobra.Command) { // To operate in the context of a bundle, all commands must take an "target" parameter. cmd.PersistentFlags().StringP("target", "t", "", "bundle target to use (if applicable)") cmd.RegisterFlagCompletionFunc("target", targetCompletion) diff --git a/cmd/root/bundle_test.go b/cmd/root/bundle_test.go index db23d1eb2b..2e7b60b864 100644 --- a/cmd/root/bundle_test.go +++ b/cmd/root/bundle_test.go @@ -34,7 +34,7 @@ func emptyCommand(t *testing.T) *cobra.Command { ctx := context.Background() cmd := &cobra.Command{} cmd.SetContext(ctx) - InitProfileFlag(cmd) + initProfileFlag(cmd) return cmd } @@ -209,7 +209,7 @@ func TestBundleConfigureProfileFlagAndEnvVariable(t *testing.T) { func TestTargetFlagFull(t *testing.T) { cmd := emptyCommand(t) - InitTargetFlag(cmd) + initTargetFlag(cmd) cmd.SetArgs([]string{"version", "--target", "development"}) ctx := context.Background() @@ -221,7 +221,7 @@ func TestTargetFlagFull(t *testing.T) { func TestTargetFlagShort(t *testing.T) { cmd := emptyCommand(t) - InitTargetFlag(cmd) + initTargetFlag(cmd) cmd.SetArgs([]string{"version", "-t", "production"}) ctx := context.Background() @@ -234,7 +234,7 @@ func TestTargetFlagShort(t *testing.T) { // TODO: remove when environment flag is fully deprecated func TestTargetEnvironmentFlag(t *testing.T) { cmd := emptyCommand(t) - InitTargetFlag(cmd) + initTargetFlag(cmd) initEnvironmentFlag(cmd) cmd.SetArgs([]string{"version", "--environment", "development"}) diff --git a/cmd/root/io.go b/cmd/root/io.go index 48a478aa86..bba989a796 100644 --- a/cmd/root/io.go +++ b/cmd/root/io.go @@ -9,12 +9,12 @@ import ( const envOutputFormat = "DATABRICKS_OUTPUT_FORMAT" -type OutputFlag struct { +type outputFlag struct { output flags.Output } -func InitOutputFlag(cmd *cobra.Command) *OutputFlag { - f := OutputFlag{ +func initOutputFlag(cmd *cobra.Command) *outputFlag { + f := outputFlag{ output: flags.OutputText, } @@ -37,7 +37,7 @@ func OutputType(cmd *cobra.Command) flags.Output { return *f } -func (f *OutputFlag) InitializeIO(cmd *cobra.Command) error { +func (f *outputFlag) initializeIO(cmd *cobra.Command) error { var headerTemplate, template string if cmd.Annotations != nil { // rely on zeroval being an empty string diff --git a/cmd/root/logger.go b/cmd/root/logger.go index 21bd8691fc..38e09b9c9f 100644 --- a/cmd/root/logger.go +++ b/cmd/root/logger.go @@ -20,14 +20,14 @@ const ( envLogFormat = "DATABRICKS_LOG_FORMAT" ) -type LogFlags struct { +type logFlags struct { file flags.LogFileFlag level flags.LogLevelFlag output flags.Output debug bool } -func (f *LogFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error) { +func (f *logFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error) { switch f.output { case flags.OutputJSON: return slog.NewJSONHandler(f.file.Writer(), &opts), nil @@ -43,7 +43,7 @@ func (f *LogFlags) makeLogHandler(opts slog.HandlerOptions) (slog.Handler, error } } -func (f *LogFlags) InitializeContext(ctx context.Context) (context.Context, error) { +func (f *logFlags) initializeContext(ctx context.Context) (context.Context, error) { if f.debug { err := f.level.Set("debug") if err != nil { @@ -74,8 +74,8 @@ func (f *LogFlags) InitializeContext(ctx context.Context) (context.Context, erro return log.NewContext(ctx, slog.Default()), nil } -func InitLogFlags(cmd *cobra.Command) *LogFlags { - f := LogFlags{ +func initLogFlags(cmd *cobra.Command) *logFlags { + f := logFlags{ file: flags.NewLogFileFlag(), level: flags.NewLogLevelFlag(), output: flags.OutputText, diff --git a/cmd/root/progress_logger.go b/cmd/root/progress_logger.go index 70e12d2090..0cc49b2ac8 100644 --- a/cmd/root/progress_logger.go +++ b/cmd/root/progress_logger.go @@ -14,13 +14,13 @@ import ( const envProgressFormat = "DATABRICKS_CLI_PROGRESS_FORMAT" -type ProgressLoggerFlag struct { +type progressLoggerFlag struct { flags.ProgressLogFormat - log *LogFlags + log *logFlags } -func (f *ProgressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) flags.ProgressLogFormat { +func (f *progressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) flags.ProgressLogFormat { if (f.log.level.String() == "disabled" || f.log.file.String() != "stderr") && term.IsTerminal(int(os.Stderr.Fd())) { return flags.ModeInplace @@ -28,7 +28,7 @@ func (f *ProgressLoggerFlag) resolveModeDefault(format flags.ProgressLogFormat) return flags.ModeAppend } -func (f *ProgressLoggerFlag) InitializeContext(ctx context.Context) (context.Context, error) { +func (f *progressLoggerFlag) initializeContext(ctx context.Context) (context.Context, error) { // No need to initialize the logger if it's already set in the context. This // happens in unit tests where the logger is setup as a fixture. if _, ok := cmdio.FromContext(ctx); ok { @@ -49,8 +49,8 @@ func (f *ProgressLoggerFlag) InitializeContext(ctx context.Context) (context.Con return cmdio.NewContext(ctx, progressLogger), nil } -func InitProgressLoggerFlag(cmd *cobra.Command, logFlags *LogFlags) *ProgressLoggerFlag { - f := ProgressLoggerFlag{ +func initProgressLoggerFlag(cmd *cobra.Command, logFlags *logFlags) *progressLoggerFlag { + f := progressLoggerFlag{ ProgressLogFormat: flags.NewProgressLogFormat(), log: logFlags, diff --git a/cmd/root/progress_logger_test.go b/cmd/root/progress_logger_test.go index 6c9d2b8ff0..42ba1bdc6c 100644 --- a/cmd/root/progress_logger_test.go +++ b/cmd/root/progress_logger_test.go @@ -13,8 +13,8 @@ import ( type progressLoggerTest struct { *cobra.Command - *LogFlags - *ProgressLoggerFlag + *logFlags + *progressLoggerFlag } func initializeProgressLoggerTest(t *testing.T) ( @@ -26,9 +26,9 @@ func initializeProgressLoggerTest(t *testing.T) ( plt := &progressLoggerTest{ Command: &cobra.Command{}, } - plt.LogFlags = InitLogFlags(plt.Command) - plt.ProgressLoggerFlag = InitProgressLoggerFlag(plt.Command, plt.LogFlags) - return plt, &plt.LogFlags.level, &plt.LogFlags.file, &plt.ProgressLoggerFlag.ProgressLogFormat + plt.logFlags = initLogFlags(plt.Command) + plt.progressLoggerFlag = initProgressLoggerFlag(plt.Command, plt.logFlags) + return plt, &plt.logFlags.level, &plt.logFlags.file, &plt.progressLoggerFlag.ProgressLogFormat } func TestInitializeErrorOnIncompatibleConfig(t *testing.T) { @@ -36,7 +36,7 @@ func TestInitializeErrorOnIncompatibleConfig(t *testing.T) { require.NoError(t, logLevel.Set("info")) require.NoError(t, logFile.Set("stderr")) require.NoError(t, progressFormat.Set("inplace")) - _, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) + _, err := plt.progressLoggerFlag.initializeContext(context.Background()) assert.ErrorContains(t, err, "inplace progress logging cannot be used when log-file is stderr") } @@ -45,7 +45,7 @@ func TestNoErrorOnDisabledLogLevel(t *testing.T) { require.NoError(t, logLevel.Set("disabled")) require.NoError(t, logFile.Set("stderr")) require.NoError(t, progressFormat.Set("inplace")) - _, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) + _, err := plt.progressLoggerFlag.initializeContext(context.Background()) assert.NoError(t, err) } @@ -54,14 +54,14 @@ func TestNoErrorOnNonStderrLogFile(t *testing.T) { require.NoError(t, logLevel.Set("info")) require.NoError(t, logFile.Set("stdout")) require.NoError(t, progressFormat.Set("inplace")) - _, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) + _, err := plt.progressLoggerFlag.initializeContext(context.Background()) assert.NoError(t, err) } func TestDefaultLoggerModeResolution(t *testing.T) { plt, _, _, progressFormat := initializeProgressLoggerTest(t) require.Equal(t, *progressFormat, flags.ModeDefault) - ctx, err := plt.ProgressLoggerFlag.InitializeContext(context.Background()) + ctx, err := plt.progressLoggerFlag.initializeContext(context.Background()) require.NoError(t, err) logger, ok := cmdio.FromContext(ctx) assert.True(t, ok) diff --git a/cmd/root/root.go b/cmd/root/root.go index 09f9090891..9815d0288d 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -21,7 +21,6 @@ import ( "github.com/spf13/cobra" ) -// New is copied to cmd/pipelines/root.go and adapted for pipelines use. func New(ctx context.Context) *cobra.Command { cmd := &cobra.Command{ Use: "databricks", @@ -43,18 +42,18 @@ func New(ctx context.Context) *cobra.Command { cmd.SetContext(ctx) // Initialize flags - logFlags := InitLogFlags(cmd) - progressLoggerFlag := InitProgressLoggerFlag(cmd, logFlags) - outputFlag := InitOutputFlag(cmd) - InitProfileFlag(cmd) + logFlags := initLogFlags(cmd) + progressLoggerFlag := initProgressLoggerFlag(cmd, logFlags) + outputFlag := initOutputFlag(cmd) + initProfileFlag(cmd) initEnvironmentFlag(cmd) - InitTargetFlag(cmd) + initTargetFlag(cmd) cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { ctx := cmd.Context() // Configure default logger. - ctx, err := logFlags.InitializeContext(ctx) + ctx, err := logFlags.initializeContext(ctx) if err != nil { return err } @@ -65,7 +64,7 @@ func New(ctx context.Context) *cobra.Command { slog.String("args", strings.Join(os.Args, ", "))) // Configure progress logger - ctx, err = progressLoggerFlag.InitializeContext(ctx) + ctx, err = progressLoggerFlag.initializeContext(ctx) if err != nil { return err } @@ -73,7 +72,7 @@ func New(ctx context.Context) *cobra.Command { cmd.SetContext(ctx) // Configure command IO - err = outputFlag.InitializeIO(cmd) + err = outputFlag.initializeIO(cmd) if err != nil { return err } @@ -81,20 +80,20 @@ func New(ctx context.Context) *cobra.Command { ctx = cmd.Context() // Configure our user agent with the command that's about to be executed. - ctx = WithCommandInUserAgent(ctx, cmd) - ctx = WithCommandExecIdInUserAgent(ctx) - ctx = WithUpstreamInUserAgent(ctx) + ctx = withCommandInUserAgent(ctx, cmd) + ctx = withCommandExecIdInUserAgent(ctx) + ctx = withUpstreamInUserAgent(ctx) cmd.SetContext(ctx) return nil } - cmd.SetFlagErrorFunc(FlagErrorFunc) + cmd.SetFlagErrorFunc(flagErrorFunc) cmd.SetVersionTemplate("Databricks CLI v{{.Version}}\n") return cmd } // Wrap flag errors to include the usage string. -func FlagErrorFunc(c *cobra.Command, err error) error { +func flagErrorFunc(c *cobra.Command, err error) error { return fmt.Errorf("%w\n\n%s", err, c.UsageString()) } @@ -170,7 +169,7 @@ Stack Trace: exitCode = 1 } - commandStr := CommandString(cmd) + commandStr := commandString(cmd) ctx = cmd.Context() // Log bundle deploy failures. Only log if we have successfully configured diff --git a/cmd/root/user_agent_command.go b/cmd/root/user_agent_command.go index 887292d25f..306f2d7bfa 100644 --- a/cmd/root/user_agent_command.go +++ b/cmd/root/user_agent_command.go @@ -13,9 +13,9 @@ import ( // See unit test [main.TestCommandsDontUseUnderscoreInName]. const commandSeparator = "_" -// CommandString walks up the command hierarchy of the specified +// commandString walks up the command hierarchy of the specified // command to build a string representing this hierarchy. -func CommandString(cmd *cobra.Command) string { +func commandString(cmd *cobra.Command) string { reversed := []string{cmd.Name()} cmd.VisitParents(func(p *cobra.Command) { if !p.HasParent() { @@ -32,6 +32,6 @@ func CommandString(cmd *cobra.Command) string { return strings.Join(ordered, commandSeparator) } -func WithCommandInUserAgent(ctx context.Context, cmd *cobra.Command) context.Context { - return useragent.InContext(ctx, "cmd", CommandString(cmd)) +func withCommandInUserAgent(ctx context.Context, cmd *cobra.Command) context.Context { + return useragent.InContext(ctx, "cmd", commandString(cmd)) } diff --git a/cmd/root/user_agent_command_exec_id.go b/cmd/root/user_agent_command_exec_id.go index 6bd3f6a274..22b8fc3f64 100644 --- a/cmd/root/user_agent_command_exec_id.go +++ b/cmd/root/user_agent_command_exec_id.go @@ -7,7 +7,7 @@ import ( "github.com/databricks/databricks-sdk-go/useragent" ) -func WithCommandExecIdInUserAgent(ctx context.Context) context.Context { +func withCommandExecIdInUserAgent(ctx context.Context) context.Context { // A UUID that will allow us to correlate multiple API requests made by // the same CLI invocation. return useragent.InContext(ctx, "cmd-exec-id", cmdctx.ExecId(ctx)) diff --git a/cmd/root/user_agent_command_exec_id_test.go b/cmd/root/user_agent_command_exec_id_test.go index 08552a980e..652d0ddd39 100644 --- a/cmd/root/user_agent_command_exec_id_test.go +++ b/cmd/root/user_agent_command_exec_id_test.go @@ -11,7 +11,7 @@ import ( func TestWithCommandExecIdInUserAgent(t *testing.T) { ctx := cmdctx.GenerateExecId(context.Background()) - ctx = WithCommandExecIdInUserAgent(ctx) + ctx = withCommandExecIdInUserAgent(ctx) // user agent should contain cmd-exec-id/ ua := useragent.FromContext(ctx) diff --git a/cmd/root/user_agent_command_test.go b/cmd/root/user_agent_command_test.go index 0f4615aedc..a3f5bbcb1c 100644 --- a/cmd/root/user_agent_command_test.go +++ b/cmd/root/user_agent_command_test.go @@ -25,11 +25,11 @@ func TestWithCommandInUserAgent(t *testing.T) { root.AddCommand(hello) hello.AddCommand(world) - assert.Equal(t, "root", CommandString(root)) - assert.Equal(t, "hello", CommandString(hello)) - assert.Equal(t, "hello_world", CommandString(world)) + assert.Equal(t, "root", commandString(root)) + assert.Equal(t, "hello", commandString(hello)) + assert.Equal(t, "hello_world", commandString(world)) - ctx := WithCommandInUserAgent(context.Background(), world) + ctx := withCommandInUserAgent(context.Background(), world) ua := useragent.FromContext(ctx) assert.Contains(t, ua, "cmd/hello_world") diff --git a/cmd/root/user_agent_upstream.go b/cmd/root/user_agent_upstream.go index de53a80ab2..a813e8ee74 100644 --- a/cmd/root/user_agent_upstream.go +++ b/cmd/root/user_agent_upstream.go @@ -19,7 +19,7 @@ const ( upstreamVersionKey = "upstream-version" ) -func WithUpstreamInUserAgent(ctx context.Context) context.Context { +func withUpstreamInUserAgent(ctx context.Context) context.Context { value := env.Get(ctx, upstreamEnvVar) if value == "" { return ctx diff --git a/cmd/root/user_agent_upstream_test.go b/cmd/root/user_agent_upstream_test.go index 9981f5d4ae..fc6ea0c75d 100644 --- a/cmd/root/user_agent_upstream_test.go +++ b/cmd/root/user_agent_upstream_test.go @@ -10,20 +10,20 @@ import ( func TestUpstreamSet(t *testing.T) { t.Setenv(upstreamEnvVar, "foobar") - ctx := WithUpstreamInUserAgent(context.Background()) + ctx := withUpstreamInUserAgent(context.Background()) assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") } func TestUpstreamSetEmpty(t *testing.T) { t.Setenv(upstreamEnvVar, "") - ctx := WithUpstreamInUserAgent(context.Background()) + ctx := withUpstreamInUserAgent(context.Background()) assert.NotContains(t, useragent.FromContext(ctx), "upstream/") } func TestUpstreamVersionSet(t *testing.T) { t.Setenv(upstreamEnvVar, "foobar") t.Setenv(upstreamVersionEnvVar, "0.0.1") - ctx := WithUpstreamInUserAgent(context.Background()) + ctx := withUpstreamInUserAgent(context.Background()) assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") assert.Contains(t, useragent.FromContext(ctx), "upstream-version/0.0.1") } @@ -31,7 +31,7 @@ func TestUpstreamVersionSet(t *testing.T) { func TestUpstreamVersionSetEmpty(t *testing.T) { t.Setenv(upstreamEnvVar, "foobar") t.Setenv(upstreamVersionEnvVar, "") - ctx := WithUpstreamInUserAgent(context.Background()) + ctx := withUpstreamInUserAgent(context.Background()) assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") assert.NotContains(t, useragent.FromContext(ctx), "upstream-version/") } @@ -39,7 +39,7 @@ func TestUpstreamVersionSetEmpty(t *testing.T) { func TestUpstreamVersionSetUpstreamNotSet(t *testing.T) { t.Setenv(upstreamEnvVar, "") t.Setenv(upstreamVersionEnvVar, "0.0.1") - ctx := WithUpstreamInUserAgent(context.Background()) + ctx := withUpstreamInUserAgent(context.Background()) assert.NotContains(t, useragent.FromContext(ctx), "upstream/") assert.NotContains(t, useragent.FromContext(ctx), "upstream-version/") } From db1e52cf3490fead8a1ba6dbbe86181d01861955 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Mon, 30 Jun 2025 12:04:27 -0700 Subject: [PATCH 10/13] renamed to cli-pipelines --- .../init/error-cases/output/my_project/README.md | 2 +- .../init/python/output/my_python_project/README.md | 2 +- .../pipelines/init/sql/output/my_sql_project/README.md | 2 +- cmd/pipelines/init.go | 2 +- libs/template/template.go | 10 +++++----- libs/template/templates/cli-pipelines/README.md | 3 +++ .../databricks_template_schema.json | 0 .../library/variables.tmpl | 0 .../template/__preamble.tmpl | 0 .../template/{{.project_name}}/.gitignore.tmpl | 0 .../{{.project_name}}/.vscode/__builtins__.pyi | 0 .../template/{{.project_name}}/.vscode/extensions.json | 0 .../{{.project_name}}/.vscode/settings.json.tmpl | 0 .../template/{{.project_name}}/README.md.tmpl | 2 +- .../template/{{.project_name}}/databricks.yml.tmpl | 0 .../{{.project_name}}_pipeline/README.md.tmpl | 0 .../explorations/sample_exploration.ipynb.tmpl | 0 .../sample_trips_{{.project_name}}.py.tmpl | 0 .../sample_trips_{{.project_name}}.sql.tmpl | 0 .../sample_zones_{{.project_name}}.py.tmpl | 0 .../sample_zones_{{.project_name}}.sql.tmpl | 0 .../{{.project_name}}_pipeline/utilities/utils.py | 0 .../{{.project_name}}.pipeline.yml.tmpl | 0 libs/template/templates/pipelines/README.md | 3 --- 24 files changed, 13 insertions(+), 13 deletions(-) create mode 100644 libs/template/templates/cli-pipelines/README.md rename libs/template/templates/{pipelines => cli-pipelines}/databricks_template_schema.json (100%) rename libs/template/templates/{pipelines => cli-pipelines}/library/variables.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/__preamble.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/.gitignore.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/.vscode/__builtins__.pyi (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/.vscode/extensions.json (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/.vscode/settings.json.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/README.md.tmpl (92%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/databricks.yml.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py (100%) rename libs/template/templates/{pipelines => cli-pipelines}/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl (100%) delete mode 100644 libs/template/templates/pipelines/README.md diff --git a/acceptance/pipelines/init/error-cases/output/my_project/README.md b/acceptance/pipelines/init/error-cases/output/my_project/README.md index b4e82b9564..48def0c4df 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/README.md +++ b/acceptance/pipelines/init/error-cases/output/my_project/README.md @@ -1,6 +1,6 @@ # my_project -The 'my_project' project was generated by using the Pipelines template. +The 'my_project' project was generated by using the CLI Pipelines template. ## Setup diff --git a/acceptance/pipelines/init/python/output/my_python_project/README.md b/acceptance/pipelines/init/python/output/my_python_project/README.md index 47bf2d1236..02ff63426f 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/README.md +++ b/acceptance/pipelines/init/python/output/my_python_project/README.md @@ -1,6 +1,6 @@ # my_python_project -The 'my_python_project' project was generated by using the Pipelines template. +The 'my_python_project' project was generated by using the CLI Pipelines template. ## Setup diff --git a/acceptance/pipelines/init/sql/output/my_sql_project/README.md b/acceptance/pipelines/init/sql/output/my_sql_project/README.md index b47837085a..b04732c41a 100644 --- a/acceptance/pipelines/init/sql/output/my_sql_project/README.md +++ b/acceptance/pipelines/init/sql/output/my_sql_project/README.md @@ -1,6 +1,6 @@ # my_sql_project -The 'my_sql_project' project was generated by using the Pipelines template. +The 'my_sql_project' project was generated by using the CLI Pipelines template. ## Setup diff --git a/cmd/pipelines/init.go b/cmd/pipelines/init.go index 1577f63e48..e847b3fc22 100644 --- a/cmd/pipelines/init.go +++ b/cmd/pipelines/init.go @@ -22,7 +22,7 @@ func initCommand() *cobra.Command { ctx := cmd.Context() r := template.Resolver{ - TemplatePathOrUrl: "pipelines", + TemplatePathOrUrl: "cli-pipelines", ConfigFile: configFile, OutputDir: outputDir, } diff --git a/libs/template/template.go b/libs/template/template.go index 18993bbf94..aa9609380d 100644 --- a/libs/template/template.go +++ b/libs/template/template.go @@ -27,7 +27,7 @@ const ( DefaultPython TemplateName = "default-python" DefaultSql TemplateName = "default-sql" LakeflowPipelines TemplateName = "lakeflow-pipelines" - Pipelines TemplateName = "pipelines" + CLIPipelines TemplateName = "cli-pipelines" DbtSql TemplateName = "dbt-sql" MlopsStacks TemplateName = "mlops-stacks" DefaultPydabs TemplateName = "default-pydabs" @@ -56,11 +56,11 @@ var databricksTemplates = []Template{ Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: LakeflowPipelines}}, }, { - name: Pipelines, + name: CLIPipelines, hidden: true, - description: "The default template for pipelines CLI", - Reader: &builtinReader{name: string(Pipelines)}, - Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: Pipelines}}, + description: "The default template for CLI pipelines", + Reader: &builtinReader{name: string(CLIPipelines)}, + Writer: &writerWithFullTelemetry{defaultWriter: defaultWriter{name: CLIPipelines}}, }, { name: DbtSql, diff --git a/libs/template/templates/cli-pipelines/README.md b/libs/template/templates/cli-pipelines/README.md new file mode 100644 index 0000000000..12dc8a7a42 --- /dev/null +++ b/libs/template/templates/cli-pipelines/README.md @@ -0,0 +1,3 @@ +# CLI Pipelines + +Default template for CLI Pipelines diff --git a/libs/template/templates/pipelines/databricks_template_schema.json b/libs/template/templates/cli-pipelines/databricks_template_schema.json similarity index 100% rename from libs/template/templates/pipelines/databricks_template_schema.json rename to libs/template/templates/cli-pipelines/databricks_template_schema.json diff --git a/libs/template/templates/pipelines/library/variables.tmpl b/libs/template/templates/cli-pipelines/library/variables.tmpl similarity index 100% rename from libs/template/templates/pipelines/library/variables.tmpl rename to libs/template/templates/cli-pipelines/library/variables.tmpl diff --git a/libs/template/templates/pipelines/template/__preamble.tmpl b/libs/template/templates/cli-pipelines/template/__preamble.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/__preamble.tmpl rename to libs/template/templates/cli-pipelines/template/__preamble.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.gitignore.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/.gitignore.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/.gitignore.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/.gitignore.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/cli-pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/.vscode/__builtins__.pyi diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/cli-pipelines/template/{{.project_name}}/.vscode/extensions.json similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/.vscode/extensions.json rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/.vscode/extensions.json diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/.vscode/settings.json.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl similarity index 92% rename from libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl index b3cf02df7d..021ec94625 100644 --- a/libs/template/templates/pipelines/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/cli-pipelines/template/{{.project_name}}/README.md.tmpl @@ -1,6 +1,6 @@ # {{.project_name}} -The '{{.project_name}}' project was generated by using the Pipelines template. +The '{{.project_name}}' project was generated by using the CLI Pipelines template. ## Setup diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/databricks.yml.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/databricks.yml.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/databricks.yml.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/README.md.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/explorations/sample_exploration.ipynb.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.sql.tmpl diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/utilities/utils.py diff --git a/libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl similarity index 100% rename from libs/template/templates/pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl rename to libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/{{.project_name}}.pipeline.yml.tmpl diff --git a/libs/template/templates/pipelines/README.md b/libs/template/templates/pipelines/README.md deleted file mode 100644 index 1eec4c7f53..0000000000 --- a/libs/template/templates/pipelines/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Pipelines - -Default template for pipelines From c6b6cca1d332710231d6ab40206b6ab67bdc0f32 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Tue, 1 Jul 2025 03:46:48 -0700 Subject: [PATCH 11/13] copied tests, added comments --- cmd/pipelines/root/auth.go | 1 + cmd/pipelines/root/bundle.go | 1 + cmd/pipelines/root/io.go | 1 + cmd/pipelines/root/logger.go | 1 + cmd/pipelines/root/progress_logger.go | 1 + cmd/pipelines/root/root.go | 1 + cmd/pipelines/root/user_agent_command.go | 1 + .../root/user_agent_command_exec_id.go | 1 + .../root/user_agent_command_exec_id_test.go | 20 ++++++++ cmd/pipelines/root/user_agent_command_test.go | 37 +++++++++++++++ cmd/pipelines/root/user_agent_upstream.go | 1 + .../root/user_agent_upstream_test.go | 46 +++++++++++++++++++ 12 files changed, 112 insertions(+) create mode 100644 cmd/pipelines/root/user_agent_command_exec_id_test.go create mode 100644 cmd/pipelines/root/user_agent_command_test.go create mode 100644 cmd/pipelines/root/user_agent_upstream_test.go diff --git a/cmd/pipelines/root/auth.go b/cmd/pipelines/root/auth.go index 5991f18efd..7d7e7be8d8 100644 --- a/cmd/pipelines/root/auth.go +++ b/cmd/pipelines/root/auth.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/auth.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/bundle.go b/cmd/pipelines/root/bundle.go index 8c18846216..8ce84617fd 100644 --- a/cmd/pipelines/root/bundle.go +++ b/cmd/pipelines/root/bundle.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/bundle.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/io.go b/cmd/pipelines/root/io.go index bba989a796..a67e557848 100644 --- a/cmd/pipelines/root/io.go +++ b/cmd/pipelines/root/io.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/io.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/logger.go b/cmd/pipelines/root/logger.go index 38e09b9c9f..54957e8650 100644 --- a/cmd/pipelines/root/logger.go +++ b/cmd/pipelines/root/logger.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/logger.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/progress_logger.go b/cmd/pipelines/root/progress_logger.go index 0cc49b2ac8..6128819793 100644 --- a/cmd/pipelines/root/progress_logger.go +++ b/cmd/pipelines/root/progress_logger.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/progress_logger.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/root.go b/cmd/pipelines/root/root.go index 4d51714461..b22600b712 100644 --- a/cmd/pipelines/root/root.go +++ b/cmd/pipelines/root/root.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/root.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/user_agent_command.go b/cmd/pipelines/root/user_agent_command.go index 306f2d7bfa..7320e085a3 100644 --- a/cmd/pipelines/root/user_agent_command.go +++ b/cmd/pipelines/root/user_agent_command.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/user_agent_command.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/user_agent_command_exec_id.go b/cmd/pipelines/root/user_agent_command_exec_id.go index 22b8fc3f64..1d934ee6d3 100644 --- a/cmd/pipelines/root/user_agent_command_exec_id.go +++ b/cmd/pipelines/root/user_agent_command_exec_id.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/user_agent_command_exec_id.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/user_agent_command_exec_id_test.go b/cmd/pipelines/root/user_agent_command_exec_id_test.go new file mode 100644 index 0000000000..328b641b9a --- /dev/null +++ b/cmd/pipelines/root/user_agent_command_exec_id_test.go @@ -0,0 +1,20 @@ +// Copied from cmd/root/user_agent_command_exec_id_test.go and adapted for pipelines use. +package root + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/cmdctx" + "github.com/databricks/databricks-sdk-go/useragent" + "github.com/stretchr/testify/assert" +) + +func TestWithCommandExecIdInUserAgent(t *testing.T) { + ctx := cmdctx.GenerateExecId(context.Background()) + ctx = withCommandExecIdInUserAgent(ctx) + + // user agent should contain cmd-exec-id/ + ua := useragent.FromContext(ctx) + assert.Regexp(t, `cmd-exec-id/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`, ua) +} diff --git a/cmd/pipelines/root/user_agent_command_test.go b/cmd/pipelines/root/user_agent_command_test.go new file mode 100644 index 0000000000..7cc284c8b1 --- /dev/null +++ b/cmd/pipelines/root/user_agent_command_test.go @@ -0,0 +1,37 @@ +// Copied from cmd/root/user_agent_command_test.go and adapted for pipelines use. +package root + +import ( + "context" + "testing" + + "github.com/databricks/databricks-sdk-go/useragent" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" +) + +func TestWithCommandInUserAgent(t *testing.T) { + root := &cobra.Command{ + Use: "root", + } + + hello := &cobra.Command{ + Use: "hello", + } + + world := &cobra.Command{ + Use: "world", + } + + root.AddCommand(hello) + hello.AddCommand(world) + + assert.Equal(t, "root", commandString(root)) + assert.Equal(t, "hello", commandString(hello)) + assert.Equal(t, "hello_world", commandString(world)) + + ctx := withCommandInUserAgent(context.Background(), world) + + ua := useragent.FromContext(ctx) + assert.Contains(t, ua, "cmd/hello_world") +} diff --git a/cmd/pipelines/root/user_agent_upstream.go b/cmd/pipelines/root/user_agent_upstream.go index a813e8ee74..fdefb3bf90 100644 --- a/cmd/pipelines/root/user_agent_upstream.go +++ b/cmd/pipelines/root/user_agent_upstream.go @@ -1,3 +1,4 @@ +// Copied from cmd/root/user_agent_upstream.go and adapted for pipelines use. package root import ( diff --git a/cmd/pipelines/root/user_agent_upstream_test.go b/cmd/pipelines/root/user_agent_upstream_test.go new file mode 100644 index 0000000000..fdc4972e65 --- /dev/null +++ b/cmd/pipelines/root/user_agent_upstream_test.go @@ -0,0 +1,46 @@ +// Copied from cmd/root/user_agent_upstream_test.go and adapted for pipelines use. +package root + +import ( + "context" + "testing" + + "github.com/databricks/databricks-sdk-go/useragent" + "github.com/stretchr/testify/assert" +) + +func TestUpstreamSet(t *testing.T) { + t.Setenv(upstreamEnvVar, "foobar") + ctx := withUpstreamInUserAgent(context.Background()) + assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") +} + +func TestUpstreamSetEmpty(t *testing.T) { + t.Setenv(upstreamEnvVar, "") + ctx := withUpstreamInUserAgent(context.Background()) + assert.NotContains(t, useragent.FromContext(ctx), "upstream/") +} + +func TestUpstreamVersionSet(t *testing.T) { + t.Setenv(upstreamEnvVar, "foobar") + t.Setenv(upstreamVersionEnvVar, "0.0.1") + ctx := withUpstreamInUserAgent(context.Background()) + assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") + assert.Contains(t, useragent.FromContext(ctx), "upstream-version/0.0.1") +} + +func TestUpstreamVersionSetEmpty(t *testing.T) { + t.Setenv(upstreamEnvVar, "foobar") + t.Setenv(upstreamVersionEnvVar, "") + ctx := withUpstreamInUserAgent(context.Background()) + assert.Contains(t, useragent.FromContext(ctx), "upstream/foobar") + assert.NotContains(t, useragent.FromContext(ctx), "upstream-version/") +} + +func TestUpstreamVersionSetUpstreamNotSet(t *testing.T) { + t.Setenv(upstreamEnvVar, "") + t.Setenv(upstreamVersionEnvVar, "0.0.1") + ctx := withUpstreamInUserAgent(context.Background()) + assert.NotContains(t, useragent.FromContext(ctx), "upstream/") + assert.NotContains(t, useragent.FromContext(ctx), "upstream-version/") +} From 8b076b87143a4ab188edac110be83feb41613e08 Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Tue, 1 Jul 2025 07:23:29 -0700 Subject: [PATCH 12/13] removed linter --- .../transformations/sample_trips_my_project.py | 5 +---- .../transformations/sample_zones_my_project.py | 8 +------- .../transformations/sample_trips_my_python_project.py | 5 +---- .../transformations/sample_zones_my_python_project.py | 8 +------- .../sample_trips_{{.project_name}}.py.tmpl | 5 +---- .../sample_zones_{{.project_name}}.py.tmpl | 8 +------- ruff.toml | 3 +-- 7 files changed, 7 insertions(+), 35 deletions(-) diff --git a/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py index 8b9196403e..0d9eeb5dbd 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py @@ -10,7 +10,4 @@ @dlt.table def sample_trips_my_project(): - return ( - spark.read.table("samples.nyctaxi.trips") - .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) - ) + return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py index ca0eda976e..a28f52eef2 100644 --- a/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py +++ b/acceptance/pipelines/init/error-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py @@ -10,10 +10,4 @@ @dlt.table def sample_zones_my_project(): # Read from the "sample_trips" table, then sum all the fares - return ( - spark.read.table("sample_trips_my_project") - .groupBy(col("pickup_zip")) - .agg( - sum("fare_amount").alias("total_fare") - ) - ) + return spark.read.table("sample_trips_my_project").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) diff --git a/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py index 7b4584cdf5..ab2d165c43 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_trips_my_python_project.py @@ -10,7 +10,4 @@ @dlt.table def sample_trips_my_python_project(): - return ( - spark.read.table("samples.nyctaxi.trips") - .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) - ) + return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py index 94b3556414..b1846fda32 100644 --- a/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py +++ b/acceptance/pipelines/init/python/output/my_python_project/my_python_project_pipeline/transformations/sample_zones_my_python_project.py @@ -10,10 +10,4 @@ @dlt.table def sample_zones_my_python_project(): # Read from the "sample_trips" table, then sum all the fares - return ( - spark.read.table("sample_trips_my_python_project") - .groupBy(col("pickup_zip")) - .agg( - sum("fare_amount").alias("total_fare") - ) - ) + return spark.read.table("sample_trips_my_python_project").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) diff --git a/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl index a191f88b9f..963856d6b4 100644 --- a/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl +++ b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_trips_{{.project_name}}.py.tmpl @@ -10,7 +10,4 @@ from utilities import utils @dlt.table def sample_trips_{{ .project_name }}(): - return ( - spark.read.table("samples.nyctaxi.trips") - .withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) - ) + return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance"))) diff --git a/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl index 64e40036d0..89a81121f8 100644 --- a/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl +++ b/libs/template/templates/cli-pipelines/template/{{.project_name}}/{{.project_name}}_pipeline/transformations/sample_zones_{{.project_name}}.py.tmpl @@ -10,10 +10,4 @@ from pyspark.sql.functions import col, sum @dlt.table def sample_zones_{{ .project_name }}(): # Read from the "sample_trips" table, then sum all the fares - return ( - spark.read.table("sample_trips_{{ .project_name }}") - .groupBy(col("pickup_zip")) - .agg( - sum("fare_amount").alias("total_fare") - ) - ) + return spark.read.table("sample_trips_{{ .project_name }}").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare")) diff --git a/ruff.toml b/ruff.toml index c146fff15c..5838db95ee 100644 --- a/ruff.toml +++ b/ruff.toml @@ -3,6 +3,5 @@ line-length = 150 exclude = [ "tagging.py", # tagging.py is synced from universe in the `openapi/tagging` directory and follows different format rules. - "acceptance/bundle/templates/lakeflow-pipelines/**/*.py", # files are manually formatted - "acceptance/pipelines/init/**/*.py" # files are manually formatted + "acceptance/bundle/templates/lakeflow-pipelines/**/*.py" # files are manually formatted ] From 09dc35d13e8cbd2d67c05593a7a1c40c4983037f Mon Sep 17 00:00:00 2001 From: Alyssa Gorbaneva Date: Tue, 1 Jul 2025 12:55:58 -0700 Subject: [PATCH 13/13] removed from cloud --- acceptance/pipelines/init/python/test.toml | 2 -- acceptance/pipelines/init/sql/test.toml | 2 -- 2 files changed, 4 deletions(-) delete mode 100644 acceptance/pipelines/init/python/test.toml delete mode 100644 acceptance/pipelines/init/sql/test.toml diff --git a/acceptance/pipelines/init/python/test.toml b/acceptance/pipelines/init/python/test.toml deleted file mode 100644 index 0e8c8a3840..0000000000 --- a/acceptance/pipelines/init/python/test.toml +++ /dev/null @@ -1,2 +0,0 @@ -Local = true -Cloud = true diff --git a/acceptance/pipelines/init/sql/test.toml b/acceptance/pipelines/init/sql/test.toml deleted file mode 100644 index 0e8c8a3840..0000000000 --- a/acceptance/pipelines/init/sql/test.toml +++ /dev/null @@ -1,2 +0,0 @@ -Local = true -Cloud = true