Skip to content

Commit

Permalink
Persist deployment metadata in WSFS (#845)
Browse files Browse the repository at this point in the history
## Changes

This PR introduces a metadata struct that stores a subset of bundle
configuration that we wish to expose to other Databricks services that
wish to integrate with bundles.

This metadata file is uploaded to a file
`${bundle.workspace.state_path}/metadata.json` in the WSFS destination
of the bundle deployment.

Documentation for emitted metadata fields:
* `version`: Version for the metadata file schema
* `config.bundle.git.branch`: Name of the git branch the bundle was
deployed from.
* `config.bundle.git.origin_url`: URL for git remote "origin"
* `config.bundle.git.bundle_root_path`: Relative path of the bundle root
from the root of the git repository. Is set to "." if they are the same.
* `config.bundle.git.commit`: SHA-1 commit hash of the exact commit this
bundle was deployed from. Note, the deployment might not exactly match
this commit version if there are changes that have not been committed to
git at deploy time,
* `file_path`: Path in workspace where we sync bundle files to. 
* `resources.jobs.[job-ref].id`: Id of the job
* `resources.jobs.[job-ref].relative_path`: Relative path of the yaml
config file from the bundle root where this job was defined.

Example metadata object when bundle root and git root are the same:
```json
{
  "version": 1,
  "config": {
    "bundle": {
      "lock": {},
      "git": {
        "branch": "master",
        "origin_url": "www.host.com",
        "commit": "7af8e5d3f5dceffff9295d42d21606ccf056dce0",
        "bundle_root_path": "."
      }
    },
    "workspace": {
      "file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
    },
    "resources": {
      "jobs": {
        "bar": {
          "id": "245921165354846",
          "relative_path": "databricks.yml"
        }
      }
    },
    "sync": {}
  }
}
```

Example metadata when the git root is one level above the bundle repo:
```json
{
  "version": 1,
  "config": {
    "bundle": {
      "lock": {},
      "git": {
        "branch": "dev-branch",
        "origin_url": "www.my-repo.com",
        "commit": "3db46ef750998952b00a2b3e7991e31787e4b98b",
        "bundle_root_path": "pipeline-progress"
      }
    },
    "workspace": {
      "file_path": "/Users/shreyas.goenka@databricks.com/.bundle/pipeline-progress/default/files"
    },
    "resources": {
      "jobs": {
        "bar": {
          "id": "245921165354846",
          "relative_path": "databricks.yml"
        }
      }
    },
    "sync": {}
  }
}
```


This unblocks integration to the jobs break glass UI for bundles.

## Tests
Unit tests and integration tests.
  • Loading branch information
shreyas-goenka committed Oct 27, 2023
1 parent 905fe10 commit 5a8cd0c
Show file tree
Hide file tree
Showing 19 changed files with 429 additions and 8 deletions.
9 changes: 9 additions & 0 deletions bundle/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/cli/folders"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/locker"
Expand All @@ -31,6 +32,14 @@ const internalFolder = ".internal"
type Bundle struct {
Config config.Root

// Metadata about the bundle deployment. This is the interface Databricks services
// rely on to integrate with bundles when they need additional information about
// a bundle deployment.
//
// After deploy, a file containing the metadata (metadata.json) can be found
// in the WSFS location containing the bundle state.
Metadata metadata.Metadata

// Store a pointer to the workspace client.
// It can be initialized on demand after loading the configuration.
clientOnce sync.Once
Expand Down
2 changes: 1 addition & 1 deletion bundle/config/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ type Bundle struct {
Lock Lock `json:"lock" bundle:"readonly"`

// Force-override Git branch validation.
Force bool `json:"force" bundle:"readonly"`
Force bool `json:"force,omitempty" bundle:"readonly"`

// Contains Git information like current commit, current branch and
// origin url. Automatically loaded by reading .git directory if not specified
Expand Down
3 changes: 3 additions & 0 deletions bundle/config/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ type Git struct {
OriginURL string `json:"origin_url,omitempty"`
Commit string `json:"commit,omitempty" bundle:"readonly"`

// Path to bundle root relative to the git repository root.
BundleRootPath string `json:"bundle_root_path,omitempty" bundle:"readonly"`

// Inferred is set to true if the Git details were inferred and weren't set explicitly
Inferred bool `json:"-" bundle:"readonly"`

Expand Down
4 changes: 2 additions & 2 deletions bundle/config/lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ type Lock struct {
// Enabled toggles deployment lock. True by default.
// Use a pointer value so that only explicitly configured values are set
// and we don't merge configuration with zero-initialized values.
Enabled *bool `json:"enabled"`
Enabled *bool `json:"enabled,omitempty"`

// Force acquisition of deployment lock even if it is currently held.
// This may be necessary if a prior deployment failed to release the lock.
Force bool `json:"force"`
Force bool `json:"force,omitempty"`
}

func (lock Lock) IsEnabled() bool {
Expand Down
13 changes: 13 additions & 0 deletions bundle/config/mutator/load_git_details.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package mutator

import (
"context"
"path/filepath"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/git"
Expand Down Expand Up @@ -52,5 +53,17 @@ func (m *loadGitDetails) Apply(ctx context.Context, b *bundle.Bundle) error {
remoteUrl := repo.OriginUrl()
b.Config.Bundle.Git.OriginURL = remoteUrl
}

// Compute relative path of the bundle root from the Git repo root.
absBundlePath, err := filepath.Abs(b.Config.Path)
if err != nil {
return err
}
// repo.Root() returns the absolute path of the repo
relBundlePath, err := filepath.Rel(repo.Root(), absBundlePath)
if err != nil {
return err
}
b.Config.Bundle.Git.BundleRootPath = filepath.ToSlash(relBundlePath)
return nil
}
4 changes: 2 additions & 2 deletions bundle/config/paths/paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
)

type Paths struct {
// ConfigFilePath holds the path to the configuration file that
// described the resource that this type is embedded in.
// Absolute path on the local file system to the configuration file that holds
// the definition of this resource.
ConfigFilePath string `json:"-" bundle:"readonly"`
}

Expand Down
51 changes: 51 additions & 0 deletions bundle/deploy/metadata/compute.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package metadata

import (
"context"
"fmt"
"path/filepath"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/metadata"
)

type compute struct{}

func Compute() bundle.Mutator {
return &compute{}
}

func (m *compute) Name() string {
return "metadata.Compute"
}

func (m *compute) Apply(_ context.Context, b *bundle.Bundle) error {
b.Metadata = metadata.Metadata{
Version: metadata.Version,
Config: metadata.Config{},
}

// Set git details in metadata
b.Metadata.Config.Bundle.Git = b.Config.Bundle.Git

// Set job config paths in metadata
jobsMetadata := make(map[string]*metadata.Job)
for name, job := range b.Config.Resources.Jobs {
// Compute config file path the job is defined in, relative to the bundle
// root
relativePath, err := filepath.Rel(b.Config.Path, job.ConfigFilePath)
if err != nil {
return fmt.Errorf("failed to compute relative path for job %s: %w", name, err)
}
// Metadata for the job
jobsMetadata[name] = &metadata.Job{
ID: job.ID,
RelativePath: filepath.ToSlash(relativePath),
}
}
b.Metadata.Config.Resources.Jobs = jobsMetadata

// Set file upload destination of the bundle in metadata
b.Metadata.Config.Workspace.FilesPath = b.Config.Workspace.FilesPath
return nil
}
100 changes: 100 additions & 0 deletions bundle/deploy/metadata/compute_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package metadata

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/paths"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/metadata"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestComputeMetadataMutator(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Workspace: config.Workspace{
RootPath: "/Users/shreyas.goenka@databricks.com",
ArtifactsPath: "/Users/shreyas.goenka@databricks.com/artifacts",
FilesPath: "/Users/shreyas.goenka@databricks.com/files",
},
Bundle: config.Bundle{
Name: "my-bundle",
Target: "development",
Git: config.Git{
Branch: "my-branch",
OriginURL: "www.host.com",
Commit: "abcd",
BundleRootPath: "a/b/c/d",
},
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"my-job-1": {
Paths: paths.Paths{
ConfigFilePath: "a/b/c",
},
ID: "1111",
JobSettings: &jobs.JobSettings{
Name: "My Job One",
},
},
"my-job-2": {
Paths: paths.Paths{
ConfigFilePath: "d/e/f",
},
ID: "2222",
JobSettings: &jobs.JobSettings{
Name: "My Job Two",
},
},
},
Pipelines: map[string]*resources.Pipeline{
"my-pipeline": {
Paths: paths.Paths{
ConfigFilePath: "abc",
},
},
},
},
},
}

expectedMetadata := metadata.Metadata{
Version: metadata.Version,
Config: metadata.Config{
Workspace: metadata.Workspace{
FilesPath: "/Users/shreyas.goenka@databricks.com/files",
},
Bundle: metadata.Bundle{
Git: config.Git{
Branch: "my-branch",
OriginURL: "www.host.com",
Commit: "abcd",
BundleRootPath: "a/b/c/d",
},
},
Resources: metadata.Resources{
Jobs: map[string]*metadata.Job{
"my-job-1": {
RelativePath: "a/b/c",
ID: "1111",
},
"my-job-2": {
RelativePath: "d/e/f",
ID: "2222",
},
},
},
},
}

err := Compute().Apply(context.Background(), b)
require.NoError(t, err)

assert.Equal(t, expectedMetadata, b.Metadata)
}
36 changes: 36 additions & 0 deletions bundle/deploy/metadata/upload.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package metadata

import (
"bytes"
"context"
"encoding/json"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/filer"
)

const MetadataFileName = "metadata.json"

type upload struct{}

func Upload() bundle.Mutator {
return &upload{}
}

func (m *upload) Name() string {
return "metadata.Upload"
}

func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error {
f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath)
if err != nil {
return err
}

metadata, err := json.MarshalIndent(b.Metadata, "", " ")
if err != nil {
return err
}

return f.Write(ctx, MetadataFileName, bytes.NewReader(metadata), filer.CreateParentDirectories, filer.OverwriteIfExists)
}
45 changes: 45 additions & 0 deletions bundle/metadata/metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package metadata

import (
"github.com/databricks/cli/bundle/config"
)

const Version = 1

type Bundle struct {
Git config.Git `json:"git,omitempty"`
}

type Workspace struct {
FilesPath string `json:"file_path,omitempty"`
}

type Job struct {
ID string `json:"id,omitempty"`

// Relative path from the bundle root to the configuration file that holds
// the definition of this resource.
RelativePath string `json:"relative_path,omitempty"`
}

type Resources struct {
Jobs map[string]*Job `json:"jobs,omitempty"`
}

type Config struct {
Bundle Bundle `json:"bundle,omitempty"`
Workspace Workspace `json:"workspace,omitempty"`
Resources Resources `json:"resources,omitempty"`
}

// Metadata about the bundle deployment. This is the interface Databricks services
// rely on to integrate with bundles when they need additional information about
// a bundle deployment.
//
// After deploy, a file containing the metadata (metadata.json) can be found
// in the WSFS location containing the bundle state.
type Metadata struct {
Version int `json:"version"`

Config Config `json:"config"`
}
8 changes: 7 additions & 1 deletion bundle/phases/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/deploy/files"
"github.com/databricks/cli/bundle/deploy/lock"
"github.com/databricks/cli/bundle/deploy/metadata"
"github.com/databricks/cli/bundle/deploy/terraform"
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/cli/bundle/python"
Expand All @@ -31,7 +32,12 @@ func Deploy() bundle.Mutator {
terraform.StatePull(),
bundle.Defer(
terraform.Apply(),
terraform.StatePush(),
bundle.Seq(
terraform.StatePush(),
terraform.Load(),
metadata.Compute(),
metadata.Upload(),
),
),
),
lock.Release(lock.GoalDeploy),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"properties": {
"unique_id": {
"type": "string",
"description": "Unique ID for job name"
"description": "Unique ID for pipeline name"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"properties": {
"unique_id": {
"type": "string",
"description": "Unique ID for job name"
},
"spark_version": {
"type": "string",
"description": "Spark version used for job cluster"
},
"node_type_id": {
"type": "string",
"description": "Node type id for job cluster"
}
}
}
2 changes: 2 additions & 0 deletions internal/bundle/bundles/job_metadata/template/a/b/bar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Databricks notebook source
print("bye")
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
resources:
jobs:
bar:
name: test-job-metadata-2-{{.unique_id}}
tasks:
- task_key: my_notebook_task
new_cluster:
num_workers: 1
spark_version: "{{.spark_version}}"
node_type_id: "{{.node_type_id}}"
notebook_task:
notebook_path: "./bar.py"
Loading

0 comments on commit 5a8cd0c

Please sign in to comment.