Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement uncoordinated upgrades on e2e (backport #238) #243

Merged
merged 3 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
- `[e2e]` Add functionality for uncoordinated (minor) upgrades
([\#56](https://github.com/tendermint/tendermint/pull/56))
7 changes: 7 additions & 0 deletions test/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ generator. For example:
node (the `cometbft/e2e-node` image) must be available on the local machine,
or via [Docker Hub](https://hub.docker.com/r/cometbft/e2e-node).

Multiversion testnets can also perform uncoordinated upgrades. Nodes containing a
perturbation of type `upgrade` will upgrade to the target version specified in
testnet's attribute `upgrade_version` of the testnet manifest.
The generator generates this type of perturbation both on full nodes and on light nodes.
Perturbations of type `upgrade` are a noop if the node's version matches the
one in `upgrade_version`.

## Test Stages

The test runner has the following stages, which can also be executed explicitly by running `./build/runner -f <manifest> <stage>`:
Expand Down
5 changes: 5 additions & 0 deletions test/e2e/generator/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ var (
"pause": 0.1,
"kill": 0.1,
"restart": 0.1,
"upgrade": 0.3,
}
lightNodePerturbations = probSetChoice{
"upgrade": 0.3,
}
)

Expand Down Expand Up @@ -310,6 +314,7 @@ func generateLightNode(r *rand.Rand, startAt int64, providers []string) *e2e.Man
Database: nodeDatabases.Choose(r).(string),
PersistInterval: ptrUint64(0),
PersistentPeers: providers,
Perturb: lightNodePerturbations.Choose(r),
}
}

Expand Down
25 changes: 24 additions & 1 deletion test/e2e/pkg/infra/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ services:
e2e: true
container_name: {{ .Name }}
image: cometbft/e2e-node:{{ .Version }}
{{- if eq .ABCIProtocol "builtin" }}
{{- if or (eq .ABCIProtocol "builtin") (eq .ABCIProtocol "builtin_unsync") }}
entrypoint: /usr/bin/entrypoint-builtin
{{- end }}
init: true
Expand All @@ -68,9 +68,32 @@ services:
- 6060
volumes:
- ./{{ .Name }}:/cometbft
- ./{{ .Name }}:/tendermint
networks:
{{ $.Name }}:
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
{{- if ne .Version $.UpgradeVersion}}

{{ .Name }}_u:
labels:
e2e: true
container_name: {{ .Name }}_u
image: cometbft/e2e-node:{{ $.UpgradeVersion }}
{{- if or (eq .ABCIProtocol "builtin") (eq .ABCIProtocol "builtin_unsync") }}
entrypoint: /usr/bin/entrypoint-builtin
{{- end }}
init: true
ports:
- 26656
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
- 6060
volumes:
- ./{{ .Name }}:/cometbft
- ./{{ .Name }}:/tendermint
networks:
{{ $.Name }}:
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }}
{{- end }}

{{end}}`)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions test/e2e/pkg/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ type Manifest struct {
CheckTxDelay time.Duration `toml:"check_tx_delay"`
// TODO: add vote extension and finalize block delay (@cmwaters)

// UpgradeVersion specifies to which version nodes need to upgrade to.
// Currently only uncoordinated upgrade is supported
UpgradeVersion string `toml:"upgrade_version"`

LoadTxSizeBytes int `toml:"load_tx_size_bytes"`
LoadTxBatchSize int `toml:"load_tx_batch_size"`
LoadTxConnections int `toml:"load_tx_connections"`
Expand Down
18 changes: 16 additions & 2 deletions test/e2e/pkg/testnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ const (
defaultBatchSize = 2
defaultConnections = 1
defaultTxSizeBytes = 1024

localVersion = "local-version"
)

type (
Expand All @@ -49,6 +51,7 @@ const (
PerturbationKill Perturbation = "kill"
PerturbationPause Perturbation = "pause"
PerturbationRestart Perturbation = "restart"
PerturbationUpgrade Perturbation = "upgrade"

EvidenceAgeHeight int64 = 7
EvidenceAgeTime time.Duration = 500 * time.Millisecond
Expand All @@ -74,6 +77,7 @@ type Testnet struct {
PrepareProposalDelay time.Duration
ProcessProposalDelay time.Duration
CheckTxDelay time.Duration
UpgradeVersion string
}

// Node represents a CometBFT node in a testnet.
Expand Down Expand Up @@ -136,6 +140,7 @@ func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Test
PrepareProposalDelay: manifest.PrepareProposalDelay,
ProcessProposalDelay: manifest.ProcessProposalDelay,
CheckTxDelay: manifest.CheckTxDelay,
UpgradeVersion: manifest.UpgradeVersion,
}
if len(manifest.KeyType) != 0 {
testnet.KeyType = manifest.KeyType
Expand All @@ -146,6 +151,9 @@ func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Test
if testnet.ABCIProtocol == "" {
testnet.ABCIProtocol = string(ProtocolBuiltin)
}
if testnet.UpgradeVersion == "" {
testnet.UpgradeVersion = localVersion
}
if testnet.LoadTxConnections == 0 {
testnet.LoadTxConnections = defaultConnections
}
Expand All @@ -167,11 +175,11 @@ func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Test
nodeManifest := manifest.Nodes[name]
ind, ok := ifd.Instances[name]
if !ok {
return nil, fmt.Errorf("information for node '%s' missing from infrastucture data", name)
return nil, fmt.Errorf("information for node '%s' missing from infrastructure data", name)
}
v := nodeManifest.Version
if v == "" {
v = "local-version"
v = localVersion
}
node := &Node{
Name: name,
Expand Down Expand Up @@ -379,8 +387,14 @@ func (n Node) Validate(testnet Testnet) error {
return errors.New("snapshot_interval must be less than er equal to retain_blocks")
}

var upgradeFound bool
for _, perturbation := range n.Perturbations {
switch perturbation {
case PerturbationUpgrade:
if upgradeFound {
return fmt.Errorf("'upgrade' perturbation can appear at most once per node")
}
upgradeFound = true
case PerturbationDisconnect, PerturbationKill, PerturbationPause, PerturbationRestart:
default:
return fmt.Errorf("invalid perturbation %q", perturbation)
Expand Down
17 changes: 14 additions & 3 deletions test/e2e/runner/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ import (

// execute executes a shell command.
func exec(args ...string) error {
_, err := execOutput(args...)
return err
}

func execOutput(args ...string) ([]byte, error) {
cmd := osexec.Command(args[0], args[1:]...) //nolint:gosec
out, err := cmd.CombinedOutput()
switch err := err.(type) {
case nil:
return nil
return out, nil
case *osexec.ExitError:
return fmt.Errorf("failed to run %q:\n%v", args, string(out))
return nil, fmt.Errorf("failed to run %q:\n%v", args, string(out))
default:
return err
return nil, err
}
}

Expand All @@ -36,6 +41,12 @@ func execCompose(dir string, args ...string) error {
args...)...)
}

func execComposeOutput(dir string, args ...string) ([]byte, error) {
return execOutput(append(
[]string{"docker-compose", "-f", filepath.Join(dir, "docker-compose.yml")},
args...)...)
}

// execComposeVerbose runs a Docker Compose command for a testnet and displays its output.
func execComposeVerbose(dir string, args ...string) error {
return execVerbose(append(
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/runner/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func loadGenerate(ctx context.Context, txCh chan<- types.Tx, testnet *e2e.Testne

// A context with a timeout is created here to time the createTxBatch
// function out. If createTxBatch has not completed its work by the time
// the next batch is set to be sent out, then the context is cancled so that
// the next batch is set to be sent out, then the context is canceled so that
// the current batch is halted, allowing the next batch to begin.
tctx, cf := context.WithTimeout(ctx, time.Second)
createTxBatch(tctx, txCh, testnet, id)
Expand Down
53 changes: 46 additions & 7 deletions test/e2e/runner/perturb.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,39 +27,78 @@ func Perturb(testnet *e2e.Testnet) error {
// after recovering.
func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
testnet := node.Testnet
out, err := execComposeOutput(testnet.Dir, "ps", "-q", node.Name)
if err != nil {
return nil, err
}
name := node.Name
upgraded := false
if len(out) == 0 {
name = name + "_u"
upgraded = true
logger.Info("perturb node", "msg",
log.NewLazySprintf("Node %v already upgraded, operating on alternate container %v",
node.Name, name))
}

switch perturbation {
case e2e.PerturbationDisconnect:
logger.Info("perturb node", "msg", log.NewLazySprintf("Disconnecting node %v...", node.Name))
if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execDocker("network", "connect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
if err := execDocker("network", "connect", testnet.Name+"_"+testnet.Name, name); err != nil {
return nil, err
}

case e2e.PerturbationKill:
logger.Info("perturb node", "msg", log.NewLazySprintf("Killing node %v...", node.Name))
if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", node.Name); err != nil {
if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", name); err != nil {
return nil, err
}
if err := execCompose(testnet.Dir, "start", node.Name); err != nil {
if err := execCompose(testnet.Dir, "start", name); err != nil {
return nil, err
}

case e2e.PerturbationPause:
logger.Info("perturb node", "msg", log.NewLazySprintf("Pausing node %v...", node.Name))
if err := execCompose(testnet.Dir, "pause", node.Name); err != nil {
if err := execCompose(testnet.Dir, "pause", name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execCompose(testnet.Dir, "unpause", node.Name); err != nil {
if err := execCompose(testnet.Dir, "unpause", name); err != nil {
return nil, err
}

case e2e.PerturbationRestart:
logger.Info("perturb node", "msg", log.NewLazySprintf("Restarting node %v...", node.Name))
if err := execCompose(testnet.Dir, "restart", node.Name); err != nil {
if err := execCompose(testnet.Dir, "restart", name); err != nil {
return nil, err
}

case e2e.PerturbationUpgrade:
oldV := node.Version
newV := node.Testnet.UpgradeVersion
if upgraded {
return nil, fmt.Errorf("node %v can't be upgraded twice from version '%v' to version '%v'",
node.Name, oldV, newV)
}
if oldV == newV {
logger.Info("perturb node", "msg",
log.NewLazySprintf("Skipping upgrade of node %v to version '%v'; versions are equal.",
node.Name, newV))
break
}
logger.Info("perturb node", "msg",
log.NewLazySprintf("Upgrading node %v from version '%v' to version '%v'...",
node.Name, oldV, newV))

if err := execCompose(testnet.Dir, "stop", name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execCompose(testnet.Dir, "up", "-d", name+"_u"); err != nil {
return nil, err
}

Expand Down
6 changes: 2 additions & 4 deletions test/e2e/runner/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,11 @@ const (
func Setup(testnet *e2e.Testnet, infp infra.Provider) error {
logger.Info("setup", "msg", log.NewLazySprintf("Generating testnet files in %q", testnet.Dir))

err := os.MkdirAll(testnet.Dir, os.ModePerm)
if err != nil {
if err := os.MkdirAll(testnet.Dir, os.ModePerm); err != nil {
return err
}

err = infp.Setup()
if err != nil {
if err := infp.Setup(); err != nil {
return err
}

Expand Down