databricks · alyssa-db · Jul 1, 2025 · Jun 24, 2025 · Jun 24, 2025 · Jun 24, 2025
diff --git a/acceptance/pipelines/init/error-cases/output.txt b/acceptance/pipelines/init/error-cases/output.txt
@@ -0,0 +1,26 @@
+
+=== Install pipelines CLI
+>>> errcode [CLI] install-pipelines-cli -d ./subdir
+pipelines successfully installed in directory "./subdir"
+
+=== Test with missing config file
+>>> errcode ./subdir/pipelines init --output-dir output
+
+Welcome to the template for pipelines!
+
+
+Your new project has been created in the 'my_project' directory!
+
+Refer to the README.md file for "getting started" instructions!
+
+=== Test with invalid project name (contains uppercase letters)
+>>> errcode ./subdir/pipelines init --config-file ./invalid_input.json --output-dir invalid-output
+Error: failed to load config from file ./invalid_input.json: invalid value for project_name: "InvalidProjectName". Name must consist of lower case letters, numbers, and underscores.
+
+Exit code: 1
+
+=== Test with non-existent config file
+>>> errcode ./subdir/pipelines init --config-file ./nonexistent.json --output-dir invalid-output-2
+Error: failed to load config from file ./nonexistent.json: open ./nonexistent.json: no such file or directory
+
+Exit code: 1
diff --git a/acceptance/pipelines/init/error-cases/output/my_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/__builtins__.pyi
@@ -0,0 +1,3 @@
+# Typings for Pylance in Visual Studio Code
+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
+from databricks.sdk.runtime import *
diff --git a/acceptance/pipelines/init/error-cases/output/my_project/.vscode/extensions.json b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/extensions.json
@@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "databricks.databricks",
+        "ms-python.vscode-pylance",
+        "redhat.vscode-yaml"
+    ]
+}
diff --git a/acceptance/pipelines/init/error-cases/output/my_project/.vscode/settings.json b/acceptance/pipelines/init/error-cases/output/my_project/.vscode/settings.json
@@ -0,0 +1,21 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.analysis.extraPaths": ["resources/my_project_pipeline"],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    },
+}
diff --git a/acceptance/pipelines/init/error-cases/output/my_project/README.md b/acceptance/pipelines/init/error-cases/output/my_project/README.md
@@ -0,0 +1,41 @@
+# my_project
+
+The 'my_project' project was generated by using the CLI Pipelines template.
+
+## Setup
+
+1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+2. Install the Pipelines CLI:
+   ```
+   $ databricks install-pipelines-cli
+   ```
+
+3. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks auth login
+    ```
+
+4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
+   https://docs.databricks.com/dev-tools/vscode-ext.html. Or the PyCharm plugin from
+   https://www.databricks.com/blog/announcing-pycharm-integration-databricks.
+
+
+## Deploying pipelines
+
+1. To deploy a development copy of this project, type:
+    ```
+    $ pipelines deploy --target dev
+    ```
+    (Note that "dev" is the default target, so the `--target` parameter
+    is optional here.)
+
+2. Similarly, to deploy a production copy, type:
+   ```
+   $ pipelines deploy --target prod
+   ```
+
+3. To run a pipeline, use the "run" command:
+   ```
+   $ pipelines run
+   ```
diff --git a/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml b/acceptance/pipelines/init/error-cases/output/my_project/databricks.yml
@@ -0,0 +1,46 @@
+# This is a Databricks pipelines definition for my_project.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: my_project
+  uuid: [UUID]
+
+include:
+  - resources/*.yml
+  - resources/*/*.yml
+  - ./*.yml
+
+# Variable declarations. These variables are assigned in the dev/prod targets below.
+variables:
+  catalog:
+    description: The catalog to use
+  schema:
+    description: The schema to use
+  notifications:
+    description: The email addresses to use for failure notifications
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed pipelines get prefixed with '[dev my_user_name]'
+    mode: development
+    default: true
+    workspace:
+      host: [DATABRICKS_URL]
+    variables:
+      catalog: hive_metastore
+      schema: ${workspace.current_user.short_name}
+      notifications: []
+
+  prod:
+    mode: production
+    workspace:
+      host: [DATABRICKS_URL]
+      # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
+      root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - user_name: [USERNAME]
+        level: CAN_MANAGE
+    variables:
+      catalog: hive_metastore
+      schema: default
+      notifications: [[USERNAME]]
diff --git a/...ance/pipelines/init/error-cases/output/my_project/my_project_pipeline/README.md b/...ance/pipelines/init/error-cases/output/my_project/my_project_pipeline/README.md
@@ -0,0 +1,22 @@
+# my_project_pipeline
+
+This folder defines all source code for the my_project_pipeline pipeline:
+
+- `explorations`: Ad-hoc notebooks used to explore the data processed by this pipeline.
+- `transformations`: All dataset definitions and transformations.
+- `utilities` (optional): Utility functions and Python modules used in this pipeline.
+- `data_sources` (optional): View definitions describing the source data for this pipeline.
+
+## Getting Started
+
+To get started, go to the `transformations` folder -- most of the relevant source code lives there:
+
+* By convention, every dataset under `transformations` is in a separate file.
+* Take a look at the sample under "sample_trips_my_project.py" to get familiar with the syntax.
+  Read more about the syntax at https://docs.databricks.com/dlt/python-ref.html.
+* Use `Run file` to run and preview a single transformation.
+* Use `Run pipeline` to run _all_ transformations in the entire pipeline.
+* Use `+ Add` in the file browser to add a new data set definition.
+* Use `Schedule` to run the pipeline on a schedule!
+
+For more tutorials and reference material, see https://docs.databricks.com/dlt.
diff --git a/...t/error-cases/output/my_project/my_project_pipeline/explorations/sample_exploration.ipynb b/...t/error-cases/output/my_project/my_project_pipeline/explorations/sample_exploration.ipynb
@@ -0,0 +1,63 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "tableResultSettingsMap": {},
+     "title": ""
+    }
+   },
+   "source": [
+    "### Example Exploratory Notebook\n",
+    "\n",
+    "Use this notebook to explore the data generated by the pipeline in your preferred programming language.\n",
+    "\n",
+    "**Note**: This notebook is not executed as part of the pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "tableResultSettingsMap": {},
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# !!! Before performing any data analysis, make sure to run the pipeline to materialize the sample datasets. The tables referenced in this notebook depend on that step.\n",
+    "\n",
+    "display(spark.sql(\"SELECT * FROM hive_metastore.[USERNAME].my_project\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "computePreferences": null,
+   "dashboards": [],
+   "environmentMetadata": null,
+   "inputWidgetPreferences": null,
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "sample_exploration",
+   "widgets": {}
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/.../pipelines/init/error-cases/output/my_project/my_project_pipeline/my_project.pipeline.yml b/.../pipelines/init/error-cases/output/my_project/my_project_pipeline/my_project.pipeline.yml
@@ -0,0 +1,12 @@
+resources:
+  pipelines:
+    my_project_pipeline:
+      name: my_project_pipeline
+      serverless: true
+      channel: "PREVIEW"
+      catalog: ${var.catalog}
+      schema: ${var.schema}
+      root_path: "."
+      libraries:
+        - glob:
+            include: transformations/**
diff --git a/...or-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py b/...or-cases/output/my_project/my_project_pipeline/transformations/sample_trips_my_project.py
@@ -0,0 +1,13 @@
+import dlt
+from pyspark.sql.functions import col
+from utilities import utils
+
+
+# This file defines a sample transformation.
+# Edit the sample below or add new transformations
+# using "+ Add" in the file browser.
+
+
+@dlt.table
+def sample_trips_my_project():
+    return spark.read.table("samples.nyctaxi.trips").withColumn("trip_distance_km", utils.distance_km(col("trip_distance")))
diff --git a/...or-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py b/...or-cases/output/my_project/my_project_pipeline/transformations/sample_zones_my_project.py
@@ -0,0 +1,13 @@
+import dlt
+from pyspark.sql.functions import col, sum
+
+
+# This file defines a sample transformation.
+# Edit the sample below or add new transformations
+# using "+ Add" in the file browser.
+
+
+@dlt.table
+def sample_zones_my_project():
+    # Read from the "sample_trips" table, then sum all the fares
+    return spark.read.table("sample_trips_my_project").groupBy(col("pickup_zip")).agg(sum("fare_amount").alias("total_fare"))
diff --git a/...tance/pipelines/init/error-cases/output/my_project/my_project_pipeline/utilities/utils.py b/...tance/pipelines/init/error-cases/output/my_project/my_project_pipeline/utilities/utils.py
@@ -0,0 +1,8 @@
+from pyspark.sql.functions import udf
+from pyspark.sql.types import FloatType
+
+
+@udf(returnType=FloatType())
+def distance_km(distance_miles):
+    """Convert distance from miles to kilometers (1 mile = 1.60934 km)."""
+    return distance_miles * 1.60934
diff --git a/acceptance/pipelines/init/error-cases/output/my_project/out.gitignore b/acceptance/pipelines/init/error-cases/output/my_project/out.gitignore
@@ -0,0 +1,8 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
+**/explorations/**
+**/!explorations/README.md
diff --git a/acceptance/pipelines/init/error-cases/script b/acceptance/pipelines/init/error-cases/script
@@ -0,0 +1,24 @@
+tmpdir="./subdir"
+pipelines="$tmpdir/pipelines"
+mkdir -p $tmpdir
+
+title "Install pipelines CLI"
+trace errcode $CLI install-pipelines-cli -d $tmpdir
+
+title "Test with missing config file"
+trace errcode $pipelines init --output-dir output
+
+title "Test with invalid project name (contains uppercase letters)"
+echo '{"project_name": "InvalidProjectName"}' > invalid_input.json
+trace errcode $pipelines init --config-file ./invalid_input.json --output-dir invalid-output
+
+title "Test with non-existent config file"
+trace errcode $pipelines init --config-file ./nonexistent.json --output-dir invalid-output-2
+
+# Do not affect this repository's git behaviour
+mv output/my_project/.gitignore output/my_project/out.gitignore
+
+# Clean up
+rm -f invalid_input.json
+rm -f $pipelines
+rm -rf $tmpdir
diff --git a/acceptance/pipelines/init/error-cases/test.toml b/acceptance/pipelines/init/error-cases/test.toml
@@ -0,0 +1,4 @@
+[[Repls]]
+# Windows:
+Old = 'The system cannot find the file specified.'
+New = 'no such file or directory'
diff --git a/acceptance/pipelines/init/python/input.json b/acceptance/pipelines/init/python/input.json
@@ -0,0 +1,7 @@
+{
+    "project_name": "my_python_project",
+    "default_catalog": "main",
+    "personal_schemas": "yes",
+    "shared_schema": "default",
+    "language": "python"
+}
diff --git a/acceptance/pipelines/init/python/output.txt b/acceptance/pipelines/init/python/output.txt
@@ -0,0 +1,14 @@
+
+=== Install pipelines CLI
+>>> errcode [CLI] install-pipelines-cli -d ./subdir
+pipelines successfully installed in directory "./subdir"
+
+=== Test basic pipelines init with configuration file
+>>> ./subdir/pipelines init --config-file ./input.json --output-dir output
+
+Welcome to the template for pipelines!
+
+
+Your new project has been created in the 'my_python_project' directory!
+
+Refer to the README.md file for "getting started" instructions!
diff --git a/acceptance/pipelines/init/python/output/my_python_project/.vscode/__builtins__.pyi b/acceptance/pipelines/init/python/output/my_python_project/.vscode/__builtins__.pyi
@@ -0,0 +1,3 @@
+# Typings for Pylance in Visual Studio Code
+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
+from databricks.sdk.runtime import *
diff --git a/acceptance/pipelines/init/python/output/my_python_project/.vscode/extensions.json b/acceptance/pipelines/init/python/output/my_python_project/.vscode/extensions.json
@@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "databricks.databricks",
+        "ms-python.vscode-pylance",
+        "redhat.vscode-yaml"
+    ]
+}
diff --git a/acceptance/pipelines/init/python/output/my_python_project/.vscode/settings.json b/acceptance/pipelines/init/python/output/my_python_project/.vscode/settings.json
@@ -0,0 +1,21 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "databricks.python.envFile": "${workspaceFolder}/.env",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.analysis.extraPaths": ["resources/my_python_project_pipeline"],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true,
+    },
+}