diff --git a/acceptance/bundle/deploy/experiments/basic/out.plan.direct-exp.txt b/acceptance/bundle/deploy/experiments/basic/out.plan.direct-exp.txt new file mode 100644 index 0000000000..c54c9d511c --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/out.plan.direct-exp.txt @@ -0,0 +1 @@ +Plan: 0 to add, 0 to change, 0 to delete, 1 unchanged diff --git a/acceptance/bundle/deploy/experiments/basic/out.plan.terraform.txt b/acceptance/bundle/deploy/experiments/basic/out.plan.terraform.txt new file mode 100644 index 0000000000..986d3cf68c --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/out.plan.terraform.txt @@ -0,0 +1,3 @@ +update experiments.my_experiment + +Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged diff --git a/acceptance/bundle/deploy/experiments/basic/out.test.toml b/acceptance/bundle/deploy/experiments/basic/out.test.toml new file mode 100644 index 0000000000..43c8f792f5 --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = true + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct-exp"] diff --git a/acceptance/bundle/deploy/experiments/basic/output.txt b/acceptance/bundle/deploy/experiments/basic/output.txt new file mode 100644 index 0000000000..6a97cdaf36 --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/output.txt @@ -0,0 +1,198 @@ + +=== create the experiment +>>> export EXPERIMENT_ARTIFACT_LOCATION=s3://original-[UNIQUE_NAME] + +>>> export EXPERIMENT_NAME_PREFIX=original + +>>> envsubst + +>>> [CLI] bundle plan +create experiments.my_experiment + +Plan: 1 to add, 0 to change, 0 to delete, 0 unchanged + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/experiment-basic-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] experiments get-experiment [NUMID] +{ + "name": "/Users/[USERNAME]/original-[UNIQUE_NAME]", + "artifact_location": "s3://original-[UNIQUE_NAME]", + "tags": [ + { + "key": "k1", + "value": "v1" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.experiment.sourceName", + "value": "/Users/[USERNAME]/original-[UNIQUE_NAME]" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.ownerEmail", + "value": "[USERNAME]" + }, + { + "key": "mlflow.experimentType", + "value": "MLFLOW_EXPERIMENT" + } + ] +} + +=== update the name +>>> export EXPERIMENT_NAME_PREFIX=new-name + +>>> envsubst + +>>> [CLI] bundle plan +update experiments.my_experiment + +Plan: 0 to add, 1 to change, 0 to delete, 0 unchanged + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/experiment-basic-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] experiments get-experiment [NUMID] +{ + "name": "/Users/[USERNAME]/new-name-[UNIQUE_NAME]", + "artifact_location": "s3://original-[UNIQUE_NAME]", + "tags": [ + { + "key": "k1", + "value": "v1" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.experiment.sourceName", + "value": "/Users/[USERNAME]/new-name-[UNIQUE_NAME]" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.ownerEmail", + "value": "[USERNAME]" + }, + { + "key": "mlflow.experimentType", + "value": "MLFLOW_EXPERIMENT" + } + ] +} + +=== updating the artifact location should cause a recreation +>>> export EXPERIMENT_ARTIFACT_LOCATION=s3://new-[UNIQUE_NAME] + +>>> envsubst + +>>> [CLI] bundle plan +recreate experiments.my_experiment + +Plan: 1 to add, 0 to change, 1 to delete, 0 unchanged + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/experiment-basic-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] experiments get-experiment [NUMID] +{ + "name": "/Users/[USERNAME]/new-name-[UNIQUE_NAME]", + "artifact_location": "s3://new-[UNIQUE_NAME]", + "tags": [ + { + "key": "k1", + "value": "v1" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.experiment.sourceName", + "value": "/Users/[USERNAME]/new-name-[UNIQUE_NAME]" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.ownerEmail", + "value": "[USERNAME]" + }, + { + "key": "mlflow.experimentType", + "value": "MLFLOW_EXPERIMENT" + } + ] +} + +=== adding a new tag should be a no-op +>>> envsubst + +>>> [CLI] bundle plan + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/experiment-basic-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] experiments get-experiment [NUMID] +{ + "name": "/Users/[USERNAME]/new-name-[UNIQUE_NAME]", + "artifact_location": "s3://new-[UNIQUE_NAME]", + "tags": [ + { + "key": "k1", + "value": "v1" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.experiment.sourceName", + "value": "/Users/[USERNAME]/new-name-[UNIQUE_NAME]" + }, + { + "key": "mlflow.ownerId", + "value": "[USERID]" + }, + { + "key": "mlflow.ownerEmail", + "value": "[USERNAME]" + }, + { + "key": "mlflow.experimentType", + "value": "MLFLOW_EXPERIMENT" + } + ] +} + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete experiment my_experiment + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/experiment-basic-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/experiments/basic/script b/acceptance/bundle/deploy/experiments/basic/script new file mode 100755 index 0000000000..6bb96db86d --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/script @@ -0,0 +1,37 @@ +cleanup() { + trace $CLI bundle destroy --auto-approve +} + +trap cleanup EXIT + +title "create the experiment" +trace export EXPERIMENT_ARTIFACT_LOCATION="s3://original-${UNIQUE_NAME}" +trace export EXPERIMENT_NAME_PREFIX="original" +trace envsubst < templates/one_tag.tmpl > databricks.yml +trace $CLI bundle plan +trace $CLI bundle deploy +experiment_id=$($CLI bundle summary --output json | jq -r '.resources.experiments.my_experiment.id') +trace $CLI experiments get-experiment $experiment_id | jq '.experiment | {name, artifact_location, tags}' + +title "update the name" +trace export EXPERIMENT_NAME_PREFIX="new-name" +trace envsubst < templates/one_tag.tmpl > databricks.yml +trace $CLI bundle plan +trace $CLI bundle deploy +experiment_id=$($CLI bundle summary --output json | jq -r '.resources.experiments.my_experiment.id') +trace $CLI experiments get-experiment $experiment_id | jq '.experiment | {name, artifact_location, tags}' + +title "updating the artifact location should cause a recreation" +trace export EXPERIMENT_ARTIFACT_LOCATION="s3://new-${UNIQUE_NAME}" +trace envsubst < templates/one_tag.tmpl > databricks.yml +trace $CLI bundle plan +trace $CLI bundle deploy +experiment_id=$($CLI bundle summary --output json | jq -r '.resources.experiments.my_experiment.id') +trace $CLI experiments get-experiment $experiment_id | jq '.experiment | {name, artifact_location, tags}' + +title "adding a new tag should be a no-op" +trace envsubst < templates/two_tag.tmpl > databricks.yml +trace $CLI bundle plan > out.plan.$DATABRICKS_BUNDLE_ENGINE.txt +trace $CLI bundle deploy +experiment_id=$($CLI bundle summary --output json | jq -r '.resources.experiments.my_experiment.id') +trace $CLI experiments get-experiment $experiment_id | jq '.experiment | {name, artifact_location, tags}' diff --git a/acceptance/bundle/deploy/experiments/basic/templates/one_tag.tmpl b/acceptance/bundle/deploy/experiments/basic/templates/one_tag.tmpl new file mode 100644 index 0000000000..3766a7c71b --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/templates/one_tag.tmpl @@ -0,0 +1,11 @@ +bundle: + name: experiment-basic-$UNIQUE_NAME + +resources: + experiments: + my_experiment: + artifact_location: $EXPERIMENT_ARTIFACT_LOCATION + name: /Users/${CURRENT_USER_NAME}/${EXPERIMENT_NAME_PREFIX}-${UNIQUE_NAME} + tags: + - key: "k1" + value: "v1" diff --git a/acceptance/bundle/deploy/experiments/basic/templates/two_tag.tmpl b/acceptance/bundle/deploy/experiments/basic/templates/two_tag.tmpl new file mode 100644 index 0000000000..035f58e70e --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/templates/two_tag.tmpl @@ -0,0 +1,13 @@ +bundle: + name: experiment-basic-$UNIQUE_NAME + +resources: + experiments: + my_experiment: + artifact_location: $EXPERIMENT_ARTIFACT_LOCATION + name: /Users/${CURRENT_USER_NAME}/${EXPERIMENT_NAME_PREFIX}-${UNIQUE_NAME} + tags: + - key: "k1" + value: "v1" + - key: "k2" + value: "v2" diff --git a/acceptance/bundle/deploy/experiments/basic/test.toml b/acceptance/bundle/deploy/experiments/basic/test.toml new file mode 100644 index 0000000000..c6c1abb2e6 --- /dev/null +++ b/acceptance/bundle/deploy/experiments/basic/test.toml @@ -0,0 +1,10 @@ +Cloud = true +Local = true + +[[Repls]] +Old = '\d{3,}' +New = "[NUMID]" + +# Test both terraform and direct deployment engines +[EnvMatrix] +DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct-exp"] diff --git a/acceptance/bundle/deploy/mlops-stacks/out.test.toml b/acceptance/bundle/deploy/mlops-stacks/out.test.toml index 3cdb920b67..c3a1b55592 100644 --- a/acceptance/bundle/deploy/mlops-stacks/out.test.toml +++ b/acceptance/bundle/deploy/mlops-stacks/out.test.toml @@ -2,4 +2,4 @@ Local = false Cloud = true [EnvMatrix] - DATABRICKS_BUNDLE_ENGINE = ["terraform"] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct-exp"] diff --git a/acceptance/bundle/deploy/mlops-stacks/test.toml b/acceptance/bundle/deploy/mlops-stacks/test.toml index ae16d8f388..98dbfa6435 100644 --- a/acceptance/bundle/deploy/mlops-stacks/test.toml +++ b/acceptance/bundle/deploy/mlops-stacks/test.toml @@ -3,8 +3,6 @@ Local=false Badness = "the newly initialized bundle from the 'mlops-stacks' template contains two validation warnings in the configuration" -EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform"] # uses 'experiments' resource - Ignore = [ "config.json" ] diff --git a/acceptance/bundle/refschema/out.fields.txt b/acceptance/bundle/refschema/out.fields.txt index eb7cd5d6c1..91ebbdc1ad 100644 --- a/acceptance/bundle/refschema/out.fields.txt +++ b/acceptance/bundle/refschema/out.fields.txt @@ -194,6 +194,27 @@ resources.database_instances.*.state database.DatabaseInstanceState ALL resources.database_instances.*.stopped bool ALL resources.database_instances.*.uid string ALL resources.database_instances.*.url string INPUT +resources.experiments.*.artifact_location string ALL +resources.experiments.*.creation_time int64 REMOTE +resources.experiments.*.experiment_id string REMOTE +resources.experiments.*.id string INPUT +resources.experiments.*.last_update_time int64 REMOTE +resources.experiments.*.lifecycle resources.Lifecycle INPUT +resources.experiments.*.lifecycle.prevent_destroy bool INPUT +resources.experiments.*.lifecycle_stage string REMOTE +resources.experiments.*.modified_status string INPUT +resources.experiments.*.name string ALL +resources.experiments.*.permissions []resources.MlflowExperimentPermission INPUT +resources.experiments.*.permissions[*] resources.MlflowExperimentPermission INPUT +resources.experiments.*.permissions[*].group_name string INPUT +resources.experiments.*.permissions[*].level resources.MlflowExperimentPermissionLevel INPUT +resources.experiments.*.permissions[*].service_principal_name string INPUT +resources.experiments.*.permissions[*].user_name string INPUT +resources.experiments.*.tags []ml.ExperimentTag ALL +resources.experiments.*.tags[*] ml.ExperimentTag ALL +resources.experiments.*.tags[*].key string ALL +resources.experiments.*.tags[*].value string ALL +resources.experiments.*.url string INPUT resources.jobs.*.budget_policy_id string INPUT STATE resources.jobs.*.continuous *jobs.Continuous INPUT STATE resources.jobs.*.continuous.pause_status jobs.PauseStatus INPUT STATE diff --git a/bundle/direct/dresources/all.go b/bundle/direct/dresources/all.go index a80069dc4b..fcf68e3836 100644 --- a/bundle/direct/dresources/all.go +++ b/bundle/direct/dresources/all.go @@ -9,6 +9,7 @@ import ( var SupportedResources = map[string]any{ "jobs": (*ResourceJob)(nil), "pipelines": (*ResourcePipeline)(nil), + "experiments": (*ResourceExperiment)(nil), "schemas": (*ResourceSchema)(nil), "volumes": (*ResourceVolume)(nil), "models": (*ResourceMlflowModel)(nil), diff --git a/bundle/direct/dresources/all_test.go b/bundle/direct/dresources/all_test.go index 967c3d8426..18116b5be1 100644 --- a/bundle/direct/dresources/all_test.go +++ b/bundle/direct/dresources/all_test.go @@ -61,6 +61,18 @@ var testConfig map[string]any = map[string]any{ Name: "main.myschema.my_synced_table", }, }, + "experiments": &resources.MlflowExperiment{ + CreateExperiment: ml.CreateExperiment{ + Name: "my-experiment", + Tags: []ml.ExperimentTag{ + { + Key: "my-tag", + Value: "my-value", + }, + }, + ArtifactLocation: "s3://my-bucket/my-experiment", + }, + }, "models": &resources.MlflowModel{ CreateModelRequest: ml.CreateModelRequest{ Name: "my_mlflow_model", diff --git a/bundle/direct/dresources/experiment.go b/bundle/direct/dresources/experiment.go new file mode 100644 index 0000000000..92e7baa8e6 --- /dev/null +++ b/bundle/direct/dresources/experiment.go @@ -0,0 +1,83 @@ +package dresources + +import ( + "context" + + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/deployplan" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/ml" +) + +type ResourceExperiment struct { + client *databricks.WorkspaceClient +} + +func (*ResourceExperiment) New(client *databricks.WorkspaceClient) *ResourceExperiment { + return &ResourceExperiment{ + client: client, + } +} + +func (*ResourceExperiment) PrepareState(input *resources.MlflowExperiment) *ml.CreateExperiment { + return &ml.CreateExperiment{ + Name: input.Name, + ArtifactLocation: input.ArtifactLocation, + Tags: input.Tags, + ForceSendFields: filterFields[ml.CreateExperiment](input.ForceSendFields), + } +} + +func (*ResourceExperiment) RemapState(experiment *ml.Experiment) *ml.CreateExperiment { + return &ml.CreateExperiment{ + Name: experiment.Name, + ArtifactLocation: experiment.ArtifactLocation, + Tags: experiment.Tags, + ForceSendFields: filterFields[ml.CreateExperiment](experiment.ForceSendFields), + } +} + +func (r *ResourceExperiment) DoRefresh(ctx context.Context, id string) (*ml.Experiment, error) { + result, err := r.client.Experiments.GetExperiment(ctx, ml.GetExperimentRequest{ + ExperimentId: id, + }) + if err != nil { + return nil, err + } + return result.Experiment, nil +} + +func (r *ResourceExperiment) DoCreate(ctx context.Context, config *ml.CreateExperiment) (string, error) { + result, err := r.client.Experiments.CreateExperiment(ctx, *config) + if err != nil { + return "", err + } + return result.ExperimentId, nil +} + +func (r *ResourceExperiment) DoUpdate(ctx context.Context, id string, config *ml.CreateExperiment) error { + updateReq := ml.UpdateExperiment{ + ExperimentId: id, + NewName: config.Name, + ForceSendFields: filterFields[ml.UpdateExperiment](config.ForceSendFields), + } + + return r.client.Experiments.UpdateExperiment(ctx, updateReq) +} + +func (r *ResourceExperiment) DoDelete(ctx context.Context, id string) error { + return r.client.Experiments.DeleteExperiment(ctx, ml.DeleteExperiment{ + ExperimentId: id, + }) +} + +func (*ResourceExperiment) FieldTriggers() map[string]deployplan.ActionType { + // TF implementation: https://github.com/databricks/terraform-provider-databricks/blob/6c106e8e7052bb2726148d66309fd460ed444236/mlflow/resource_mlflow_experiment.go#L22 + return map[string]deployplan.ActionType{ + "name": deployplan.ActionTypeUpdate, + "artifact_location": deployplan.ActionTypeRecreate, + + // Tags updates are not supported by TF. This mirrors that behaviour. + "tags": deployplan.ActionTypeSkip, + } +} diff --git a/libs/testserver/experiments.go b/libs/testserver/experiments.go new file mode 100644 index 0000000000..9f7a335d9b --- /dev/null +++ b/libs/testserver/experiments.go @@ -0,0 +1,117 @@ +package testserver + +import ( + "encoding/json" + "fmt" + "net/http" + "strconv" + + "github.com/databricks/databricks-sdk-go/service/ml" +) + +func (s *FakeWorkspace) ExperimentCreate(req Request) Response { + defer s.LockUnlock()() + + var experiment ml.CreateExperiment + if err := json.Unmarshal(req.Body, &experiment); err != nil { + return Response{ + Body: fmt.Sprintf("request parsing error: %s", err), + StatusCode: http.StatusBadRequest, + } + } + + // Server appends these tags automatically to experiments. + // We replicate that behaviour in the test server as well. + appendTags := []ml.ExperimentTag{ + { + Key: "mlflow.ownerId", + Value: TestUser.Id, + }, + { + Key: "mlflow.experiment.sourceName", + Value: experiment.Name, + }, + { + Key: "mlflow.ownerId", + Value: TestUser.Id, + }, + { + Key: "mlflow.ownerEmail", + Value: TestUser.UserName, + }, + { + Key: "mlflow.experimentType", + Value: "MLFLOW_EXPERIMENT", + }, + } + + experimentId := strconv.Itoa(len(s.Experiments) + 1000) + + // Create the experiment + exp := ml.Experiment{ + ExperimentId: experimentId, + Name: experiment.Name, + ArtifactLocation: experiment.ArtifactLocation, + Tags: append(experiment.Tags, appendTags...), + LifecycleStage: "active", + } + + s.Experiments[experimentId] = ml.GetExperimentResponse{ + Experiment: &exp, + } + + return Response{ + Body: ml.CreateExperimentResponse{ + ExperimentId: experimentId, + }, + } +} + +func (s *FakeWorkspace) ExperimentUpdate(req Request) Response { + defer s.LockUnlock()() + + var updateReq ml.UpdateExperiment + if err := json.Unmarshal(req.Body, &updateReq); err != nil { + return Response{ + Body: fmt.Sprintf("request parsing error: %s", err), + StatusCode: http.StatusBadRequest, + } + } + + experiment, exists := s.Experiments[updateReq.ExperimentId] + if !exists { + return Response{ + StatusCode: http.StatusNotFound, + Body: map[string]string{"message": fmt.Sprintf("Experiment %s not found", updateReq.ExperimentId)}, + } + } + + // Update the experiment + if updateReq.NewName != "" { + experiment.Experiment.Name = updateReq.NewName + + // The server modifies the value of the tag as well. Mimic that behaviour + // in the test server as well. + for i := range experiment.Experiment.Tags { + if experiment.Experiment.Tags[i].Key == "mlflow.experiment.sourceName" { + experiment.Experiment.Tags[i].Value = updateReq.NewName + } + } + } + + s.Experiments[updateReq.ExperimentId] = experiment + + return Response{} +} + +func (s *FakeWorkspace) ExperimentDelete(req Request) Response { + var deleteReq ml.DeleteExperiment + if err := json.Unmarshal(req.Body, &deleteReq); err != nil { + return Response{ + Body: fmt.Sprintf("request parsing error: %s", err), + StatusCode: http.StatusBadRequest, + } + } + + return MapDelete(s, s.Experiments, deleteReq.ExperimentId) +} diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index a5e270cbc6..ceeb1a5a26 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -76,6 +76,7 @@ type FakeWorkspace struct { Dashboards map[string]dashboards.Dashboard SqlWarehouses map[string]sql.GetWarehouseResponse Alerts map[string]sql.AlertV2 + Experiments map[string]ml.GetExperimentResponse ModelRegistryModels map[string]ml.Model Acls map[string][]workspace.AclItem @@ -174,6 +175,7 @@ func NewFakeWorkspace(url, token string) *FakeWorkspace { DatabaseCatalogs: map[string]database.DatabaseCatalog{}, SyncedDatabaseTables: map[string]database.SyncedDatabaseTable{}, Alerts: map[string]sql.AlertV2{}, + Experiments: map[string]ml.GetExperimentResponse{}, ModelRegistryModels: map[string]ml.Model{}, } } diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 316c36264e..ef31dde2f2 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -522,6 +522,31 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.JobsGetPermissions(req, req.Vars["job_id"]) }) + // MLflow Experiments: + server.Handle("GET", "/api/2.0/mlflow/experiments/get", func(req Request) any { + experimentId := req.URL.Query().Get("experiment_id") + if experimentId == "" { + return Response{ + StatusCode: http.StatusBadRequest, + Body: map[string]string{"message": "experiment_id is required"}, + } + } + + return MapGet(req.Workspace, req.Workspace.Experiments, experimentId) + }) + + server.Handle("POST", "/api/2.0/mlflow/experiments/create", func(req Request) any { + return req.Workspace.ExperimentCreate(req) + }) + + server.Handle("POST", "/api/2.0/mlflow/experiments/update", func(req Request) any { + return req.Workspace.ExperimentUpdate(req) + }) + + server.Handle("POST", "/api/2.0/mlflow/experiments/delete", func(req Request) any { + return req.Workspace.ExperimentDelete(req) + }) + // Model registry models. server.Handle("POST", "/api/2.0/mlflow/registered-models/create", func(req Request) any { return req.Workspace.ModelRegistryCreateModel(req)