Skip to content

Commit

Permalink
Merge #73384
Browse files Browse the repository at this point in the history
73384: roachprod: allow concurrent creation of clusters r=[RaduBerinde,rail] a=healthy-pod

Previously, we used the roachprod binary
to create clusters concurrently and this
worked fine because every `roachprod verb`
command starts a new seperate process.

Now we use the library so we are overriding
global provider-specific options. This patch
allows us to manage options for multiple clusters
per provider concurrently.

Release note: None

Co-authored-by: Ahmad Abedalqader <ahmad.abedalqader@cockroachlabs.com>
  • Loading branch information
craig[bot] and Ahmad Abedalqader committed Dec 3, 2021
2 parents 3900353 + 57c5292 commit 7c88d5b
Show file tree
Hide file tree
Showing 15 changed files with 332 additions and 321 deletions.
57 changes: 31 additions & 26 deletions pkg/cmd/roachprod/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,24 @@ import (
)

var (
pprofOpts roachprod.PprofOpts
numNodes int
numRacks int
username string
dryrun bool
destroyAllMine bool
destroyAllLocal bool
extendLifetime time.Duration
wipePreserveCerts bool
listDetails bool
listJSON bool
listMine bool
listPattern string
secure = false
extraSSHOptions = ""
nodeEnv = []string{
// Do not populate providerOptsContainer here as we need to call initProviders() first.
providerOptsContainer vm.ProviderOptionsContainer
pprofOpts roachprod.PprofOpts
numNodes int
numRacks int
username string
dryrun bool
destroyAllMine bool
destroyAllLocal bool
extendLifetime time.Duration
wipePreserveCerts bool
listDetails bool
listJSON bool
listMine bool
listPattern string
secure = false
extraSSHOptions = ""
nodeEnv = []string{
"COCKROACH_ENABLE_RPC_COMPRESSION=false",
"COCKROACH_UI_RELEASE_NOTES_SIGNUP_DISMISSED=true",
}
Expand Down Expand Up @@ -112,17 +114,20 @@ func initFlags() {
"international characters. Examples: usage=cloud-report-2021, namewithspaceinvalue='s o s'")

// Allow each Provider to inject additional configuration flags
for _, p := range vm.Providers {
p.Flags().ConfigureCreateFlags(createCmd.Flags())

for _, cmd := range []*cobra.Command{
destroyCmd, extendCmd, listCmd, syncCmd, gcCmd,
} {
p.Flags().ConfigureClusterFlags(cmd.Flags(), vm.AcceptMultipleProjects)
for _, providerName := range vm.AllProviderNames() {
if vm.Providers[providerName].Active() {
providerOptsContainer[providerName].ConfigureCreateFlags(createCmd.Flags())

for _, cmd := range []*cobra.Command{
destroyCmd, extendCmd, listCmd, syncCmd, gcCmd,
} {
providerOptsContainer[providerName].ConfigureClusterFlags(cmd.Flags(), vm.AcceptMultipleProjects)
}

// createCmd only accepts a single GCE project, as opposed to all the other
// commands.
providerOptsContainer[providerName].ConfigureClusterFlags(createCmd.Flags(), vm.SingleProject)
}
// createCmd only accepts a single GCE project, as opposed to all the other
// commands.
p.Flags().ConfigureClusterFlags(createCmd.Flags(), vm.SingleProject)
}

destroyCmd.Flags().BoolVarP(&destroyAllMine,
Expand Down
5 changes: 3 additions & 2 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/roachprod/ui"
"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
"github.com/cockroachdb/errors"
"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -123,7 +124,7 @@ Local Clusters
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) {
createVMOpts.ClusterName = args[0]
return roachprod.Create(username, numNodes, createVMOpts)
return roachprod.Create(username, numNodes, createVMOpts, providerOptsContainer)
}),
}

Expand Down Expand Up @@ -857,7 +858,7 @@ var versionCmd = &cobra.Command{

func main() {
roachprod.InitProviders()

providerOptsContainer = vm.CreateProviderOptionsContainer()
// The commands are displayed in the order they are added to rootCmd. Note
// that gcCmd and adminurlCmd contain a trailing \n in their Short help in
// order to separate the commands into logical groups.
Expand Down
26 changes: 11 additions & 15 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -906,24 +906,20 @@ func (f *clusterFactory) newCluster(
}
c.status("creating cluster")

var createVMOpts vm.CreateOpts
var providerOpts interface{}
{
var err error
createVMOpts, providerOpts, err = cfg.spec.RoachprodOpts(c.name, cfg.useIOBarrier)
if err != nil {
return nil, err
}
createFlagsOverride(overrideFlagset, &createVMOpts)
// Make sure c.expiration is changed if --lifetime override flag is passed.
cfg.spec.Lifetime = createVMOpts.Lifetime
c.expiration = cfg.spec.Expiration()
providerOptsContainer := vm.CreateProviderOptionsContainer()
createVMOpts, providerOpts, err := cfg.spec.RoachprodOpts(c.name, cfg.useIOBarrier)
if err != nil {
return nil, err
}

if cfg.spec.Cloud != spec.Local {
vm.Providers[cfg.spec.Cloud].Flags().ConfigureProviderOpts(providerOpts)
providerOptsContainer.SetProviderOpts(cfg.spec.Cloud, providerOpts)
}

createFlagsOverride(overrideFlagset, &createVMOpts)
// Make sure c.expiration is changed if --lifetime override flag is passed.
cfg.spec.Lifetime = createVMOpts.Lifetime
c.expiration = cfg.spec.Expiration()

// Logs for creating a new cluster go to a dedicated log file.
var retryStr string
if i > 0 {
Expand All @@ -937,7 +933,7 @@ func (f *clusterFactory) newCluster(

l.PrintfCtx(ctx, "Attempting cluster creation (attempt #%d/%d)", i, maxAttempts)
createVMOpts.ClusterName = c.name
err = roachprod.Create(cfg.username, cfg.spec.NodeCount, createVMOpts)
err = roachprod.Create(cfg.username, cfg.spec.NodeCount, createVMOpts, providerOptsContainer)
if err == nil {
if err := f.r.registerCluster(c); err != nil {
return nil, err
Expand Down
10 changes: 5 additions & 5 deletions pkg/cmd/roachtest/spec/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func awsMachineSupportsSSD(machineType string) bool {
return false
}

func getAWSOpts(machineType string, zones []string, localSSD bool) aws.ProviderOpts {
func getAWSOpts(machineType string, zones []string, localSSD bool) vm.ProviderOpts {
opts := aws.DefaultProviderOpts()
if localSSD {
opts.SSDMachineType = machineType
Expand All @@ -112,7 +112,7 @@ func getAWSOpts(machineType string, zones []string, localSSD bool) aws.ProviderO

func getGCEOpts(
machineType string, zones []string, volumeSize, localSSDCount int, localSSD bool, RAID0 bool,
) gce.ProviderOpts {
) vm.ProviderOpts {
opts := gce.DefaultProviderOpts()
opts.MachineType = machineType
if volumeSize != 0 {
Expand All @@ -132,7 +132,7 @@ func getGCEOpts(
return opts
}

func getAzureOpts(machineType string, zones []string) azure.ProviderOpts {
func getAzureOpts(machineType string, zones []string) vm.ProviderOpts {
opts := azure.DefaultProviderOpts()
opts.MachineType = machineType
if len(zones) != 0 {
Expand All @@ -145,7 +145,7 @@ func getAzureOpts(machineType string, zones []string) azure.ProviderOpts {
// in order to create the cluster described in the spec.
func (s *ClusterSpec) RoachprodOpts(
clusterName string, useIOBarrier bool,
) (vm.CreateOpts, interface{}, error) {
) (vm.CreateOpts, vm.ProviderOpts, error) {

createVMOpts := vm.DefaultCreateOpts()
createVMOpts.ClusterName = clusterName
Expand Down Expand Up @@ -227,7 +227,7 @@ func (s *ClusterSpec) RoachprodOpts(
}
}

var providerOpts interface{}
var providerOpts vm.ProviderOpts
switch s.Cloud {
case AWS:
providerOpts = getAWSOpts(machineType, zones, createVMOpts.SSDOpts.UseLocalSSD)
Expand Down
6 changes: 4 additions & 2 deletions pkg/roachprod/cloud/cluster_cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,9 @@ func ListCloud() (*Cloud, error) {
}

// CreateCluster TODO(peter): document
func CreateCluster(nodes int, opts vm.CreateOpts) error {
func CreateCluster(
nodes int, opts vm.CreateOpts, providerOptsContainer vm.ProviderOptionsContainer,
) error {
providerCount := len(opts.VMProviders)
if providerCount == 0 {
return errors.New("no VMProviders configured")
Expand All @@ -263,7 +265,7 @@ func CreateCluster(nodes int, opts vm.CreateOpts) error {
}

return vm.ProvidersParallel(opts.VMProviders, func(p vm.Provider) error {
return p.Create(vmLocations[p.Name()], opts)
return p.Create(vmLocations[p.Name()], opts, providerOptsContainer[p.Name()])
})
}

Expand Down
9 changes: 7 additions & 2 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -1062,7 +1062,12 @@ func cleanupFailedCreate(clusterName string) error {
}

// Create TODO
func Create(username string, numNodes int, createVMOpts vm.CreateOpts) (retErr error) {
func Create(
username string,
numNodes int,
createVMOpts vm.CreateOpts,
providerOptsContainer vm.ProviderOptionsContainer,
) (retErr error) {
if err := LoadClusters(); err != nil {
return errors.Wrap(err, "problem loading clusters")
}
Expand Down Expand Up @@ -1118,7 +1123,7 @@ func Create(username string, numNodes int, createVMOpts vm.CreateOpts) (retErr e
}

fmt.Printf("Creating cluster %s with %d nodes\n", clusterName, numNodes)
if createErr := cloud.CreateCluster(numNodes, createVMOpts); createErr != nil {
if createErr := cloud.CreateCluster(numNodes, createVMOpts, providerOptsContainer); createErr != nil {
return createErr
}

Expand Down

0 comments on commit 7c88d5b

Please sign in to comment.