Skip to content

Commit

Permalink
Implement uncoordinated upgrades on e2e (#238) (#243)
Browse files Browse the repository at this point in the history
* Fix multiversion e2e when `version <= v0.34.24`

* Changes we want no matter what we do

* Approach 1: With docker-compose up recreating image

* Revert "Approach 1: With docker-compose up recreating image"

This reverts commit 068f5b78d3b7cf99fb21296707117e4f4e2ef3e0.

* Approach 2: With distinct docker-compose service

* Fix the case when 'upgrade' is not the last perturbation

* Update generator

* Simplify template

* bump

* Update test/e2e/pkg/manifest.go

* changelog

* doc

* fix doc

(cherry picked from commit b27bb3a)

Co-authored-by: Sergio Mena <sergio@informal.systems>
  • Loading branch information
mergify[bot] and sergio-mena committed Jan 31, 2023
1 parent da5ad6e commit 39b93a0
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- `[e2e]` Add functionality for uncoordinated (minor) upgrades
([\#56](https://github.com/tendermint/tendermint/pull/56))
7 changes: 7 additions & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ generator. For example:
node (the `cometbft/e2e-node` image) must be available on the local machine,
or via [Docker Hub](https://hub.docker.com/r/cometbft/e2e-node).

Multiversion testnets can also perform uncoordinated upgrades. Nodes containing a
perturbation of type `upgrade` will upgrade to the target version specified in
testnet's attribute `upgrade_version` of the testnet manifest.
The generator generates this type of perturbation both on full nodes and on light nodes.
Perturbations of type `upgrade` are a noop if the node's version matches the
one in `upgrade_version`.

## Test Stages

The test runner has the following stages, which can also be executed explicitly by running `./build/runner -f <manifest> <stage>`:
Expand Down
5 changes: 5 additions & 0 deletions test/e2e/generator/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ var (
"pause": 0.1,
"kill": 0.1,
"restart": 0.1,
"upgrade": 0.3,
}
lightNodePerturbations = probSetChoice{
"upgrade": 0.3,
}
)

Expand Down Expand Up @@ -310,6 +314,7 @@ func generateLightNode(r *rand.Rand, startAt int64, providers []string) *e2e.Man
Database: nodeDatabases.Choose(r).(string),
PersistInterval: ptrUint64(0),
PersistentPeers: providers,
Perturb: lightNodePerturbations.Choose(r),
}
}

Expand Down
25 changes: 24 additions & 1 deletion test/e2e/pkg/infra/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ services:
e2e: true
container_name: {{ .Name }}
image: cometbft/e2e-node:{{ .Version }}
{{- if eq .ABCIProtocol "builtin" }}
{{- if or (eq .ABCIProtocol "builtin") (eq .ABCIProtocol "builtin_unsync") }}
entrypoint: /usr/bin/entrypoint-builtin
{{- end }}
init: true
Expand All @@ -68,9 +68,32 @@ services:
- 6060
volumes:
- ./{{ .Name }}:/cometbft
- ./{{ .Name }}:/tendermint
networks:
{{ $.Name }}:
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
{{- if ne .Version $.UpgradeVersion}}
{{ .Name }}_u:
labels:
e2e: true
container_name: {{ .Name }}_u
image: cometbft/e2e-node:{{ $.UpgradeVersion }}
{{- if or (eq .ABCIProtocol "builtin") (eq .ABCIProtocol "builtin_unsync") }}
entrypoint: /usr/bin/entrypoint-builtin
{{- end }}
init: true
ports:
- 26656
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
- 6060
volumes:
- ./{{ .Name }}:/cometbft
- ./{{ .Name }}:/tendermint
networks:
{{ $.Name }}:
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
{{- end }}
{{end}}`)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions test/e2e/pkg/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ type Manifest struct {
CheckTxDelay time.Duration `toml:"check_tx_delay"`
// TODO: add vote extension and finalize block delay (@cmwaters)

// UpgradeVersion specifies to which version nodes need to upgrade to.
// Currently only uncoordinated upgrade is supported
UpgradeVersion string `toml:"upgrade_version"`

LoadTxSizeBytes int `toml:"load_tx_size_bytes"`
LoadTxBatchSize int `toml:"load_tx_batch_size"`
LoadTxConnections int `toml:"load_tx_connections"`
Expand Down
18 changes: 16 additions & 2 deletions test/e2e/pkg/testnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ const (
defaultBatchSize = 2
defaultConnections = 1
defaultTxSizeBytes = 1024

localVersion = "local-version"
)

type (
Expand All @@ -49,6 +51,7 @@ const (
PerturbationKill Perturbation = "kill"
PerturbationPause Perturbation = "pause"
PerturbationRestart Perturbation = "restart"
PerturbationUpgrade Perturbation = "upgrade"

EvidenceAgeHeight int64 = 7
EvidenceAgeTime time.Duration = 500 * time.Millisecond
Expand All @@ -74,6 +77,7 @@ type Testnet struct {
PrepareProposalDelay time.Duration
ProcessProposalDelay time.Duration
CheckTxDelay time.Duration
UpgradeVersion string
}

// Node represents a CometBFT node in a testnet.
Expand Down Expand Up @@ -136,6 +140,7 @@ func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Test
PrepareProposalDelay: manifest.PrepareProposalDelay,
ProcessProposalDelay: manifest.ProcessProposalDelay,
CheckTxDelay: manifest.CheckTxDelay,
UpgradeVersion: manifest.UpgradeVersion,
}
if len(manifest.KeyType) != 0 {
testnet.KeyType = manifest.KeyType
Expand All @@ -146,6 +151,9 @@ func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Test
if testnet.ABCIProtocol == "" {
testnet.ABCIProtocol = string(ProtocolBuiltin)
}
if testnet.UpgradeVersion == "" {
testnet.UpgradeVersion = localVersion
}
if testnet.LoadTxConnections == 0 {
testnet.LoadTxConnections = defaultConnections
}
Expand All @@ -167,11 +175,11 @@ func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Test
nodeManifest := manifest.Nodes[name]
ind, ok := ifd.Instances[name]
if !ok {
return nil, fmt.Errorf("information for node '%s' missing from infrastucture data", name)
return nil, fmt.Errorf("information for node '%s' missing from infrastructure data", name)
}
v := nodeManifest.Version
if v == "" {
v = "local-version"
v = localVersion
}
node := &Node{
Name: name,
Expand Down Expand Up @@ -379,8 +387,14 @@ func (n Node) Validate(testnet Testnet) error {
return errors.New("snapshot_interval must be less than er equal to retain_blocks")
}

var upgradeFound bool
for _, perturbation := range n.Perturbations {
switch perturbation {
case PerturbationUpgrade:
if upgradeFound {
return fmt.Errorf("'upgrade' perturbation can appear at most once per node")
}
upgradeFound = true
case PerturbationDisconnect, PerturbationKill, PerturbationPause, PerturbationRestart:
default:
return fmt.Errorf("invalid perturbation %q", perturbation)
Expand Down
17 changes: 14 additions & 3 deletions test/e2e/runner/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ import (

// execute executes a shell command.
func exec(args ...string) error {
_, err := execOutput(args...)
return err
}

func execOutput(args ...string) ([]byte, error) {
cmd := osexec.Command(args[0], args[1:]...) //nolint:gosec
out, err := cmd.CombinedOutput()
switch err := err.(type) {
case nil:
return nil
return out, nil
case *osexec.ExitError:
return fmt.Errorf("failed to run %q:\n%v", args, string(out))
return nil, fmt.Errorf("failed to run %q:\n%v", args, string(out))
default:
return err
return nil, err
}
}

Expand All @@ -36,6 +41,12 @@ func execCompose(dir string, args ...string) error {
args...)...)
}

func execComposeOutput(dir string, args ...string) ([]byte, error) {
return execOutput(append(
[]string{"docker-compose", "-f", filepath.Join(dir, "docker-compose.yml")},
args...)...)
}

// execComposeVerbose runs a Docker Compose command for a testnet and displays its output.
func execComposeVerbose(dir string, args ...string) error {
return execVerbose(append(
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/runner/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func loadGenerate(ctx context.Context, txCh chan<- types.Tx, testnet *e2e.Testne

// A context with a timeout is created here to time the createTxBatch
// function out. If createTxBatch has not completed its work by the time
// the next batch is set to be sent out, then the context is cancled so that
// the next batch is set to be sent out, then the context is canceled so that
// the current batch is halted, allowing the next batch to begin.
tctx, cf := context.WithTimeout(ctx, time.Second)
createTxBatch(tctx, txCh, testnet, id)
Expand Down
53 changes: 46 additions & 7 deletions test/e2e/runner/perturb.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,78 @@ func Perturb(testnet *e2e.Testnet) error {
// after recovering.
func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
testnet := node.Testnet
out, err := execComposeOutput(testnet.Dir, "ps", "-q", node.Name)
if err != nil {
return nil, err
}
name := node.Name
upgraded := false
if len(out) == 0 {
name = name + "_u"
upgraded = true
logger.Info("perturb node", "msg",
log.NewLazySprintf("Node %v already upgraded, operating on alternate container %v",
node.Name, name))
}

switch perturbation {
case e2e.PerturbationDisconnect:
logger.Info("perturb node", "msg", log.NewLazySprintf("Disconnecting node %v...", node.Name))
if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execDocker("network", "connect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
if err := execDocker("network", "connect", testnet.Name+"_"+testnet.Name, name); err != nil {
return nil, err
}

case e2e.PerturbationKill:
logger.Info("perturb node", "msg", log.NewLazySprintf("Killing node %v...", node.Name))
if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", node.Name); err != nil {
if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", name); err != nil {
return nil, err
}
if err := execCompose(testnet.Dir, "start", node.Name); err != nil {
if err := execCompose(testnet.Dir, "start", name); err != nil {
return nil, err
}

case e2e.PerturbationPause:
logger.Info("perturb node", "msg", log.NewLazySprintf("Pausing node %v...", node.Name))
if err := execCompose(testnet.Dir, "pause", node.Name); err != nil {
if err := execCompose(testnet.Dir, "pause", name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execCompose(testnet.Dir, "unpause", node.Name); err != nil {
if err := execCompose(testnet.Dir, "unpause", name); err != nil {
return nil, err
}

case e2e.PerturbationRestart:
logger.Info("perturb node", "msg", log.NewLazySprintf("Restarting node %v...", node.Name))
if err := execCompose(testnet.Dir, "restart", node.Name); err != nil {
if err := execCompose(testnet.Dir, "restart", name); err != nil {
return nil, err
}

case e2e.PerturbationUpgrade:
oldV := node.Version
newV := node.Testnet.UpgradeVersion
if upgraded {
return nil, fmt.Errorf("node %v can't be upgraded twice from version '%v' to version '%v'",
node.Name, oldV, newV)
}
if oldV == newV {
logger.Info("perturb node", "msg",
log.NewLazySprintf("Skipping upgrade of node %v to version '%v'; versions are equal.",
node.Name, newV))
break
}
logger.Info("perturb node", "msg",
log.NewLazySprintf("Upgrading node %v from version '%v' to version '%v'...",
node.Name, oldV, newV))

if err := execCompose(testnet.Dir, "stop", name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execCompose(testnet.Dir, "up", "-d", name+"_u"); err != nil {
return nil, err
}

Expand Down
6 changes: 2 additions & 4 deletions test/e2e/runner/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,11 @@ const (
func Setup(testnet *e2e.Testnet, infp infra.Provider) error {
logger.Info("setup", "msg", log.NewLazySprintf("Generating testnet files in %q", testnet.Dir))

err := os.MkdirAll(testnet.Dir, os.ModePerm)
if err != nil {
if err := os.MkdirAll(testnet.Dir, os.ModePerm); err != nil {
return err
}

err = infp.Setup()
if err != nil {
if err := infp.Setup(); err != nil {
return err
}

Expand Down

0 comments on commit 39b93a0

Please sign in to comment.