Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal/cloud/backend_state.go: upload json state #31241

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ website/node_modules
*.iml
*.test
*.iml
.vscode
uturunku1 marked this conversation as resolved.
Show resolved Hide resolved

/terraform

Expand Down
10 changes: 5 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ require (
github.com/hashicorp/go-hclog v0.15.0
github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/go-plugin v1.4.3
github.com/hashicorp/go-retryablehttp v0.7.0
github.com/hashicorp/go-tfe v1.0.0
github.com/hashicorp/go-uuid v1.0.2
github.com/hashicorp/go-retryablehttp v0.7.1
github.com/hashicorp/go-tfe v1.3.0
github.com/hashicorp/go-uuid v1.0.3
github.com/hashicorp/go-version v1.3.0
github.com/hashicorp/hcl v0.0.0-20170504190234-a4b07c25de5f
github.com/hashicorp/hcl/v2 v2.12.0
Expand Down Expand Up @@ -145,7 +145,7 @@ require (
github.com/hashicorp/go-msgpack v0.5.4 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/hashicorp/go-safetemp v1.0.0 // indirect
github.com/hashicorp/go-slug v0.8.0 // indirect
github.com/hashicorp/go-slug v0.8.1 // indirect
github.com/hashicorp/golang-lru v0.5.1 // indirect
github.com/hashicorp/jsonapi v0.0.0-20210826224640-ee7dae0fb22d // indirect
github.com/hashicorp/serf v0.9.5 // indirect
Expand Down Expand Up @@ -188,7 +188,7 @@ require (
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.66.2 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.30.0 // indirect
k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 // indirect
sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect
Expand Down
20 changes: 12 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -411,23 +411,25 @@ github.com/hashicorp/go-plugin v1.3.0/go.mod h1:F9eH4LrE/ZsRdbwhfjs9k9HoDUwAHnYt
github.com/hashicorp/go-plugin v1.4.1/go.mod h1:5fGEH17QVwTTcR0zV7yhDPLLmFX9YSZ38b18Udy6vYQ=
github.com/hashicorp/go-plugin v1.4.3 h1:DXmvivbWD5qdiBts9TpBC7BYL1Aia5sxbRgQB+v6UZM=
github.com/hashicorp/go-plugin v1.4.3/go.mod h1:5fGEH17QVwTTcR0zV7yhDPLLmFX9YSZ38b18Udy6vYQ=
github.com/hashicorp/go-retryablehttp v0.7.0 h1:eu1EI/mbirUgP5C8hVsTNaGZreBDlYiwC1FZWkvQPQ4=
github.com/hashicorp/go-retryablehttp v0.7.0/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ=
github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-safetemp v1.0.0 h1:2HR189eFNrjHQyENnQMMpCiBAsRxzbTMIgBhEyExpmo=
github.com/hashicorp/go-safetemp v1.0.0/go.mod h1:oaerMy3BhqiTbVye6QuFhFtIceqFoDHxNAB65b+Rj1I=
github.com/hashicorp/go-slug v0.8.0 h1:h7AGtXVAI/cJ/Wwa/JQQaftQnWQmZbAzkzgZeZVVmLw=
github.com/hashicorp/go-slug v0.8.0/go.mod h1:Ib+IWBYfEfJGI1ZyXMGNbu2BU+aa3Dzu41RKLH301v4=
github.com/hashicorp/go-slug v0.8.1 h1:srN7ivgAjHfZddYY1DjBaihRCFy20+vCcOrlx1O2AfE=
github.com/hashicorp/go-slug v0.8.1/go.mod h1:Ib+IWBYfEfJGI1ZyXMGNbu2BU+aa3Dzu41RKLH301v4=
github.com/hashicorp/go-sockaddr v1.0.0 h1:GeH6tui99pF4NJgfnhp+L6+FfobzVW3Ah46sLo0ICXs=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
github.com/hashicorp/go-tfe v1.0.0 h1:CmwoHrOs7WJfD/yEmVjJ65+dyKeVRrgvRHBLVSQQ6Ks=
github.com/hashicorp/go-tfe v1.0.0/go.mod h1:tJF/OlAXzVbmjiimAPLplSLgwg6kZDUOy0MzHuMwvF4=
github.com/hashicorp/go-tfe v1.3.0 h1:5sboIfj0Uz6YAfPeDAVRXBKf3EI3D054kTbmOoUUW3g=
github.com/hashicorp/go-tfe v1.3.0/go.mod h1:5PORBlPPMya01sElYhCLUMu07BHGTwP5CRedU26SjPM=
github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.2 h1:cfejS+Tpcp13yd5nYHWDI6qVCny6wyX2Mt5SGur2IGE=
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
github.com/hashicorp/go-version v1.0.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/go-version v1.1.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
Expand Down Expand Up @@ -688,8 +690,9 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.194/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.232 h1:kwsWbh4rEw42ZDe9/812ebhbwNZxlQyZ2sTmxBOKhN4=
github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.232/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y=
Expand Down Expand Up @@ -1191,8 +1194,9 @@ gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
Expand Down
4 changes: 4 additions & 0 deletions internal/cloud/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,10 @@ func (b *Cloud) StateMgr(name string) (statemgr.Full, error) {

// This is optionally set during Terraform Enterprise runs.
runID: os.Getenv("TFE_RUN_ID"),
stateUpload: stateUpload{
ctxOpts: b.ContextOpts,
services: b.services,
},
}

return &remote.State{Client: client}, nil
Expand Down
105 changes: 86 additions & 19 deletions internal/cloud/backend_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,35 @@ import (
"context"
"crypto/md5"
"encoding/base64"
"errors"
"fmt"
"os"

tfe "github.com/hashicorp/go-tfe"
"github.com/hashicorp/terraform-svchost/disco"
"github.com/hashicorp/terraform/internal/command/jsonstate"
"github.com/hashicorp/terraform/internal/configs/configload"
"github.com/hashicorp/terraform/internal/states"
"github.com/hashicorp/terraform/internal/states/remote"
"github.com/hashicorp/terraform/internal/states/statefile"
"github.com/hashicorp/terraform/internal/states/statemgr"
"github.com/hashicorp/terraform/internal/terraform"
)

type remoteClient struct {
client *tfe.Client
lockInfo *statemgr.LockInfo
organization string
runID string
stateUploadErr bool
workspace *tfe.Workspace
forcePush bool
client *tfe.Client
lockInfo *statemgr.LockInfo
organization string
runID string
stateUpload stateUpload
workspace *tfe.Workspace
forcePush bool
}

type stateUpload struct {
ctxOpts *terraform.ContextOpts
services *disco.Disco
hasErrored bool
}

// Get the remote state.
Expand All @@ -33,12 +46,12 @@ func (r *remoteClient) Get() (*remote.Payload, error) {
// If no state exists, then return nil.
return nil, nil
}
return nil, fmt.Errorf("Error retrieving state: %v", err)
return nil, fmt.Errorf("error retrieving state: %v", err)
}

state, err := r.client.StateVersions.Download(ctx, sv.DownloadURL)
if err != nil {
return nil, fmt.Errorf("Error downloading state: %v", err)
return nil, fmt.Errorf("error downloading state: %v", err)
}

// If the state is empty, then return nil.
Expand All @@ -62,15 +75,29 @@ func (r *remoteClient) Put(state []byte) error {
// Read the raw state into a Terraform state.
stateFile, err := statefile.Read(bytes.NewReader(state))
if err != nil {
return fmt.Errorf("Error reading state: %s", err)
return fmt.Errorf("error reading state: %s", err)
}

schemas, err := getSchemas(r.stateUpload.ctxOpts, r.stateUpload.services, stateFile.State)

if err != nil {
r.stateUpload.hasErrored = true
return fmt.Errorf("error uploading state: %v", err)
}
jsonState, err := jsonstate.Marshal(stateFile, schemas)

if err != nil {
r.stateUpload.hasErrored = true
return fmt.Errorf("error uploading state: %v", err)
}

options := tfe.StateVersionCreateOptions{
Lineage: tfe.String(stateFile.Lineage),
Serial: tfe.Int64(int64(stateFile.Serial)),
MD5: tfe.String(fmt.Sprintf("%x", md5.Sum(state))),
State: tfe.String(base64.StdEncoding.EncodeToString(state)),
Force: tfe.Bool(r.forcePush),
Lineage: tfe.String(stateFile.Lineage),
Serial: tfe.Int64(int64(stateFile.Serial)),
MD5: tfe.String(fmt.Sprintf("%x", md5.Sum(state))),
State: tfe.String(base64.StdEncoding.EncodeToString(state)),
Force: tfe.Bool(r.forcePush),
ExtState: jsonState,
}

// If we have a run ID, make sure to add it to the options
Expand All @@ -81,19 +108,59 @@ func (r *remoteClient) Put(state []byte) error {

// Create the new state.
_, err = r.client.StateVersions.Create(ctx, r.workspace.ID, options)

if err != nil {
r.stateUploadErr = true
return fmt.Errorf("Error uploading state: %v", err)
r.stateUpload.hasErrored = true
return fmt.Errorf("error uploading state: %v", err)
}

return nil
}

func getSchemas(ctxOpts *terraform.ContextOpts, services *disco.Disco, state *states.State) (*terraform.Schemas, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd love to get another opinion on the placement of these operations. It seems like some of this should have been initialized as part of the apply command and I'm wondering how we can leverage the config we already loaded because I assume all this is fairly expensive.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All of this data has been initialized earlier as part of the apply command. For example we assign a value to schemas at the (b *Local) opApply() level:

schemas, moreDiags := lr.Core.Schemas(lr.Config, lr.InputState)
which calls (*Local) localRun()
It's within this method that we create a state manager, init the remoteClient and write/persist the state (which is the key operation for uploading state).
You'd think that maybe throughout this process *terraform.Context could have collected schemas data, but that doesn't seem to be the case. We reload the schemas
at different times, at different places. Though an expensive operation, maybe it's a safe thing?
I'll bring this concern to the core team to get another opinion.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi there! 😀

I must confess that I've not yet been able to load all of the context about the overall goals here, and so for this comment I'm focusing on the specific architecture question this thread seems to be tackling and so I might be missing some bigger details but hopefully this comment is still useful to help explore some options here.

The main thing I'm noticing while I revisit this code is that we seem to have a bit of an abstraction inversion here: the remote.State state manager implementation aims to make it easier to implement remote state storage against generic blob stores, and so it tries to encapsulate the problem of JSON-serializing and JSON-deserializing the state so that the remote.Client implementation (which is what this remoteClient struct is) needs to deal only with opaque []byte to read and write.

If we look out at the bigger picture then, this current design has the following redundancy:

My sense then is that the remote.Client abstraction is hindering rather than helping now that we need access to the real *states.State object in order to make the Terraform Cloud API request.

If you look in package statemgr you can see the interface that the CLI layer expects backends to implement for state storage, which is unfortunately defined in terms of a bunch of other interfaces and so kinda hard to quickly review but the main thing to notice is that it transitively includes the statemgr.Writer interface which has method WriteState(*states.State) error that is what the CLI layer is actually going to call when it has a new state snapshot to share. That *states.State object is not yet JSON encoded and so you can work with it directly without having to parse it first.

Based on this, I think my first recommendation would be to consider moving away from using remote.State for this backend and to write a custom statemgr.Full implementation in this package instead. I think you can probably copy remote.State into here as a starting point since I expect it will still benefit from some of the same logic around tracking state serials and lineage, but crucially it can have a different implementation of PersistState that will avoid the need to re-parse the state because it will have access to the *states.State snapshot already cached inside the state manager itself.


This admittedly still doesn't answer how this new state manager can get access to the schemas without completely reloading them from scratch, but it at least removes one extra redundant layer from the design so that we don't need to fight an abstraction that has different goals.

I think one thing that makes this extra tricky is that when running in local operations mode with the cloud integration the "backend" that the CLI layer is talking to is a bit of a "turducken" of odd layers: a cloud.Cloud with local set to a local.Backend with its nested Backend pointing back to the original cloud.Cloud. 😬

This makes life considerably harder because I think it's the local.Backend sandwiched between the two pointers to cloud.Cloud that is the one actually creating the Terraform Core context and thus loading the schemas, but it's the cloud.Cloud that actually needs access to the schemas in order to implement its StateMgr method and pass the schemas into the state manager.

My best idea right now is to consider changing the statemgr.Writer interface so that WriteState takes both the *states.State as it has today and a reference to the schemas, with the meaning "write this state snapshot using these schemas". This is awkward because every other statemgr.Writer implementation will just ignore that extra argument altogether, but perhaps that's okay because we have relatively few real implementations of statemgr.Full anyway: the statemgr.Filesystem one used for local state, and remote.State used for all of the state-storage-only backends. (there are a few others lurking for testing purposes only too, I think.)

I think the trick here will be tracking down all of the places where we call WriteState on a state manager and making sure that they always have access to the current schemas too. If that's true without any deeper refactoring then this might be the least-messy path forward, although I'd welcome any other ideas or counterproposals from others on the team.

var schemas *terraform.Schemas // to get our schemas we need a *terraform.Context, a *configs.Config and *states.State

if ctxOpts == nil {
ctxOpts = new(terraform.ContextOpts)
}

// Get our context
tfCtx, ctxDiags := terraform.NewContext(ctxOpts)
if ctxDiags.HasErrors() {
return schemas, fmt.Errorf("error uploading state to Terraform Cloud: %w", ctxDiags.Err())
uturunku1 marked this conversation as resolved.
Show resolved Hide resolved
}

// Get our config
configDir, err := os.Getwd()
if err != nil {
return schemas, fmt.Errorf("error getting current directory: %w", err)
}

loader, err := configload.NewLoader(&configload.Config{
ModulesDir: configDir + ".terraform/modules",
Services: services,
})
if err != nil {
return schemas, fmt.Errorf("error uploading state to Terraform Cloud: %w", err)
}

config, configDiags := loader.LoadConfig(configDir)
if configDiags.HasErrors() {
return schemas, fmt.Errorf("error uploading state to Terraform Cloud: %w", errors.New(configDiags.Error()))
}

schemas, schemaDiags := tfCtx.Schemas(config, state)
if schemaDiags.HasErrors() {
return schemas, fmt.Errorf("error uploading state to Terraform Cloud: %w", schemaDiags.Err())
}
return schemas, nil
}

// Delete the remote state.
func (r *remoteClient) Delete() error {
err := r.client.Workspaces.Delete(context.Background(), r.organization, r.workspace.Name)
if err != nil && err != tfe.ErrResourceNotFound {
return fmt.Errorf("Error deleting workspace %s: %v", r.workspace.Name, err)
return fmt.Errorf("error deleting workspace %s: %v", r.workspace.Name, err)
}

return nil
Expand Down Expand Up @@ -136,7 +203,7 @@ func (r *remoteClient) Unlock(id string) error {
// We first check if there was an error while uploading the latest
// state. If so, we will not unlock the workspace to prevent any
// changes from being applied until the correct state is uploaded.
if r.stateUploadErr {
if r.stateUpload.hasErrored {
return nil
}

Expand Down