diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 00152d550e..b814347431 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -5,5 +5,6 @@ ### CLI ### Bundles +* engine/direct: Drop the deployment state entry on a recreate before the follow-up `Create`, so a `Create` failure no longer leaves a broken state with `invalid state: empty id` on the next `bundle plan` ([#5173](https://github.com/databricks/cli/pull/5173)). ### Dependency updates diff --git a/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/databricks.yml.tmpl b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/databricks.yml.tmpl new file mode 100644 index 0000000000..8ad973b6d5 --- /dev/null +++ b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/databricks.yml.tmpl @@ -0,0 +1,14 @@ +bundle: + name: recreate-create-fails-$UNIQUE_NAME + +sync: + paths: [] + +resources: + vector_search_endpoints: + my_endpoint: + name: vs-endpoint-a-$UNIQUE_NAME + endpoint_type: STANDARD + blocker_endpoint: + name: vs-endpoint-b-$UNIQUE_NAME + endpoint_type: STORAGE_OPTIMIZED diff --git a/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/out.test.toml b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/out.test.toml new file mode 100644 index 0000000000..8842340818 --- /dev/null +++ b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/out.test.toml @@ -0,0 +1,4 @@ +Local = true +Cloud = false +RequiresUnityCatalog = true +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/output.txt b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/output.txt new file mode 100644 index 0000000000..67b3cb6182 --- /dev/null +++ b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/output.txt @@ -0,0 +1,42 @@ + +=== Initial deploy creates two endpoints with distinct names +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/recreate-create-fails-[UNIQUE_NAME]/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +=== Edit my_endpoint: rename onto blocker_endpoint's name and switch endpoint_type to trigger Recreate +>>> update_file.py databricks.yml vs-endpoint-a-[UNIQUE_NAME] vs-endpoint-b-[UNIQUE_NAME] + +>>> update_file.py databricks.yml endpoint_type: STANDARD endpoint_type: STORAGE_OPTIMIZED + +=== Deploy: Recreate of my_endpoint runs Delete (ok) then Create (409, name taken by blocker) +>>> [CLI] bundle deploy --auto-approve +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/recreate-create-fails-[UNIQUE_NAME]/default/files... +Deploying resources... +Error: cannot recreate resources.vector_search_endpoints.my_endpoint: Vector search endpoint with name vs-endpoint-b-[UNIQUE_NAME] already exists (409 RESOURCE_ALREADY_EXISTS) + +Endpoint: POST [DATABRICKS_URL]/api/2.0/vector-search/endpoints +HTTP Status: 409 Conflict +API error_code: RESOURCE_ALREADY_EXISTS +API message: Vector search endpoint with name vs-endpoint-b-[UNIQUE_NAME] already exists + +Updating deployment state... + +Exit code: 1 + +=== Subsequent plan recovers: my_endpoint state was dropped, replan as Create +>>> [CLI] bundle plan +create vector_search_endpoints.my_endpoint + +Plan: 1 to add, 0 to change, 0 to delete, 1 unchanged + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.vector_search_endpoints.blocker_endpoint + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/recreate-create-fails-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/script b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/script new file mode 100644 index 0000000000..b48a7e7a3c --- /dev/null +++ b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/script @@ -0,0 +1,20 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + trace $CLI bundle destroy --auto-approve || true + rm -f out.requests.txt +} +trap cleanup EXIT + +title "Initial deploy creates two endpoints with distinct names" +trace $CLI bundle deploy + +title "Edit my_endpoint: rename onto blocker_endpoint's name and switch endpoint_type to trigger Recreate" +trace update_file.py databricks.yml "vs-endpoint-a-$UNIQUE_NAME" "vs-endpoint-b-$UNIQUE_NAME" +trace update_file.py databricks.yml " endpoint_type: STANDARD" " endpoint_type: STORAGE_OPTIMIZED" + +title "Deploy: Recreate of my_endpoint runs Delete (ok) then Create (409, name taken by blocker)" +errcode trace $CLI bundle deploy --auto-approve + +title "Subsequent plan recovers: my_endpoint state was dropped, replan as Create" +trace $CLI bundle plan diff --git a/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/test.toml b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/test.toml new file mode 100644 index 0000000000..18b1a88417 --- /dev/null +++ b/acceptance/bundle/resources/vector_search_endpoints/recreate/create-fails/test.toml @@ -0,0 +1 @@ +Cloud = false diff --git a/bundle/direct/apply.go b/bundle/direct/apply.go index 42d80efab3..e7186f6467 100644 --- a/bundle/direct/apply.go +++ b/bundle/direct/apply.go @@ -86,7 +86,9 @@ func (d *DeploymentUnit) Recreate(ctx context.Context, db *dstate.DeploymentStat return fmt.Errorf("deleting old id=%s: %w", oldID, err) } - err = db.SaveState(d.ResourceKey, "", nil, nil) + // Drop the state entry so a subsequent failure of Create leaves no malformed + // (empty-id) entry behind. The next plan will see "no state" and retry as Create. + err = db.DeleteState(d.ResourceKey) if err != nil { return fmt.Errorf("deleting state: %w", err) } diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index 3fab4c3f4f..f6bcea316c 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -181,16 +181,15 @@ func (b *DeploymentBundle) CalculatePlan(ctx context.Context, client *databricks } dbentry, hasEntry := b.StateDB.GetResourceEntry(resourceKey) - if !hasEntry { + // Tolerate empty-id entries from older partial-recreate failures + // (apply.Recreate now deletes state on the way through, but pre-fix + // state files may still carry a malformed entry). Treat as missing + // and let the resource be re-created on this plan. + if !hasEntry || dbentry.ID == "" { entry.Action = deployplan.Create return true } - if dbentry.ID == "" { - logdiag.LogError(ctx, fmt.Errorf("%s: invalid state: empty id", errorPrefix)) - return false - } - savedState, err := parseState(adapter.StateType(), dbentry.State) if err != nil { logdiag.LogError(ctx, fmt.Errorf("%s: interpreting state: %w", errorPrefix, err))