diff --git a/Cargo.lock b/Cargo.lock index b8caf2c32d2d7..bf715efefd8cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9520,12 +9520,17 @@ version = "0.0.0" dependencies = [ "anyhow", "aptos", + "aptos-genesis", "aptos-global-constants", + "aptos-keygen", "aptos-logger", "aptos-rest-client", "aptos-sdk", + "aptos-types", "forge", "futures", + "hex", + "move-examples", "rand 0.7.3", "reqwest", "serde_json", diff --git a/crates/aptos/Cargo.toml b/crates/aptos/Cargo.toml index de695c8c083d9..614e9c2f481be 100644 --- a/crates/aptos/Cargo.toml +++ b/crates/aptos/Cargo.toml @@ -72,6 +72,7 @@ default = [] fuzzing = [] no-upload-proposal = [] indexer = ["aptos-node/indexer"] +cli-framework-test-move = [] [build-dependencies] shadow-rs = "0.16.2" diff --git a/crates/aptos/src/test/mod.rs b/crates/aptos/src/test/mod.rs index 29d702c7060a9..26e9f2746d1dd 100644 --- a/crates/aptos/src/test/mod.rs +++ b/crates/aptos/src/test/mod.rs @@ -16,7 +16,10 @@ use crate::common::types::{ MovePackageDir, OptionalPoolAddressArgs, PrivateKeyInputOptions, PromptOptions, PublicKeyInputOptions, RestOptions, RngArgs, SaveFile, TransactionOptions, TransactionSummary, }; + +#[cfg(feature = "cli-framework-test-move")] use crate::common::utils::write_to_file; + use crate::move_tool::{ ArgWithType, CompilePackage, DownloadPackage, FrameworkPackageArgs, IncludedArtifacts, InitPackage, MemberId, PublishPackage, RunFunction, TestPackage, @@ -52,7 +55,10 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; use std::{collections::BTreeMap, mem, path::PathBuf, str::FromStr, time::Duration}; + +#[cfg(feature = "cli-framework-test-move")] use thiserror::private::PathAsDisplay; + use tokio::time::{sleep, Instant}; #[cfg(test)] @@ -722,6 +728,7 @@ impl CliTestFramework { self.move_dir = Some(move_dir.path().to_path_buf()); } + #[cfg(feature = "cli-framework-test-move")] pub fn add_move_files(&self) { let move_dir = self.move_dir(); let sources_dir = move_dir.join("sources"); diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs index 498df597adde0..cc51c448669da 100644 --- a/testsuite/forge-cli/src/main.rs +++ b/testsuite/forge-cli/src/main.rs @@ -21,6 +21,7 @@ use testcases::performance_with_fullnode_test::PerformanceBenchmarkWithFN; use testcases::state_sync_performance::StateSyncValidatorPerformance; use testcases::three_region_simulation_test::ThreeRegionSimulationTest; use testcases::twin_validator_test::TwinValidatorTest; +use testcases::validator_join_leave_test::ValidatorJoinLeaveTest; use testcases::validator_reboot_stress_test::ValidatorRebootStressTest; use testcases::{ compatibility_test::SimpleValidatorUpgrade, forge_setup_test::ForgeSetupTest, generate_traffic, @@ -437,6 +438,7 @@ fn single_test_suite(test_name: &str) -> Result> { state_sync_perf_fullnodes_execute_transactions(config) } "state_sync_perf_validators" => state_sync_perf_validators(config), + "validators_join_and_leave" => validators_join_and_leave(config), "compat" => config .with_initial_validator_count(NonZeroUsize::new(5).unwrap()) .with_network_tests(vec![&SimpleValidatorUpgrade]) @@ -776,6 +778,33 @@ fn state_sync_perf_validators(forge_config: ForgeConfig<'static>) -> ForgeConfig .with_success_criteria(SuccessCriteria::new(5000, 10000, false, None, None, None)) } +/// The config for running a validator join and leave test. +fn validators_join_and_leave(forge_config: ForgeConfig<'static>) -> ForgeConfig<'static> { + forge_config + .with_initial_validator_count(NonZeroUsize::new(20).unwrap()) + .with_genesis_helm_config_fn(Arc::new(|helm_values| { + helm_values["chain"]["epoch_duration_secs"] = 60.into(); + helm_values["chain"]["allow_new_validators"] = true.into(); + })) + .with_network_tests(vec![&ValidatorJoinLeaveTest]) + .with_success_criteria(SuccessCriteria::new( + 5000, + 10000, + true, + Some(Duration::from_secs(240)), + Some(SystemMetricsThreshold::new( + // Check that we don't use more than 12 CPU cores for 30% of the time. + MetricsThreshold::new(12, 30), + // Check that we don't use more than 10 GB of memory for 30% of the time. + MetricsThreshold::new(10 * 1024 * 1024 * 1024, 30), + )), + Some(StateProgressThreshold { + max_no_progress_secs: 10.0, + max_round_gap: 4, + }), + )) +} + fn land_blocking_test_suite(duration: Duration) -> ForgeConfig<'static> { ForgeConfig::default() .with_initial_validator_count(NonZeroUsize::new(20).unwrap()) diff --git a/testsuite/smoke-test/src/aptos_cli/mod.rs b/testsuite/smoke-test/src/aptos_cli/mod.rs index 5b85020221320..589fabe648ec6 100644 --- a/testsuite/smoke-test/src/aptos_cli/mod.rs +++ b/testsuite/smoke-test/src/aptos_cli/mod.rs @@ -2,5 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 mod account; +#[cfg(feature = "cli-framework-test-move")] mod r#move; mod validator; diff --git a/testsuite/testcases/Cargo.toml b/testsuite/testcases/Cargo.toml index ba6a34bbdaaad..b35bedac7f70f 100644 --- a/testsuite/testcases/Cargo.toml +++ b/testsuite/testcases/Cargo.toml @@ -11,19 +11,24 @@ edition = "2021" [dependencies] anyhow = "1.0.57" -futures = "0.3.21" -rand = "0.7.3" -reqwest = { version = "0.11.10", features = ["json", "cookies", "blocking"] } -serde_json = "1.0.81" -tokio = { version = "1.21.0", features = ["full"] } +aptos = { path = "../../crates/aptos", features = ["fuzzing"] } -aptos = { path = "../../crates/aptos" } +aptos-genesis = { path = "../../crates/aptos-genesis", features = ["testing"] } aptos-global-constants = { path = "../../config/global-constants" } +aptos-keygen = { path = "../../crates/aptos-keygen" } aptos-logger = { path = "../../crates/aptos-logger" } aptos-rest-client = { path = "../../crates/aptos-rest-client" } aptos-sdk = { path = "../../sdk" } +aptos-types = { path = "../../types" } forge = { path = "../forge" } +futures = "0.3.21" +hex = "0.4.3" +move-examples = { path = "../../aptos-move/move-examples" } +rand = "0.7.3" +reqwest = { version = "0.11.10", features = ["json", "cookies", "blocking"] } +serde_json = "1.0.81" +tokio = { version = "1.21.0", features = ["full"] } [[test]] name = "forge-local-compatibility" diff --git a/testsuite/testcases/src/lib.rs b/testsuite/testcases/src/lib.rs index f6efcc3db2162..eae8cae668f7e 100644 --- a/testsuite/testcases/src/lib.rs +++ b/testsuite/testcases/src/lib.rs @@ -17,6 +17,7 @@ pub mod reconfiguration_test; pub mod state_sync_performance; pub mod three_region_simulation_test; pub mod twin_validator_test; +pub mod validator_join_leave_test; pub mod validator_reboot_stress_test; use anyhow::{anyhow, ensure}; @@ -116,7 +117,7 @@ pub trait NetworkLoadTest: Test { fn setup(&self, _ctx: &mut NetworkContext) -> Result { Ok(LoadDestination::AllNodes) } - // Load is started before this funciton is called, and stops after this function returns. + // Load is started before this function is called, and stops after this function returns. // Expected duration is passed into this function, expecting this function to take that much // time to finish. How long this function takes will dictate how long the actual test lasts. fn test(&self, _swarm: &mut dyn Swarm, duration: Duration) -> Result<()> { diff --git a/testsuite/testcases/src/validator_join_leave_test.rs b/testsuite/testcases/src/validator_join_leave_test.rs new file mode 100644 index 0000000000000..6ebac27b329b2 --- /dev/null +++ b/testsuite/testcases/src/validator_join_leave_test.rs @@ -0,0 +1,237 @@ +// Copyright (c) Aptos +// SPDX-License-Identifier: Apache-2.0 + +use crate::{LoadDestination, NetworkLoadTest}; +use aptos::account::create::DEFAULT_FUNDED_COINS; +use aptos_logger::info; +use aptos_sdk::crypto::ed25519::Ed25519PrivateKey; +use aptos_sdk::crypto::PrivateKey; +use forge::{ + reconfig, NetworkContext, NetworkTest, NodeExt, Result, Swarm, SwarmExt, Test, FORGE_KEY_SEED, +}; + +use aptos_keygen::KeyGen; + +use aptos::test::CliTestFramework; +use aptos_types::account_address::AccountAddress; +use aptos_types::transaction::authenticator::AuthenticationKey; +use std::time::Duration; +use tokio::runtime::Runtime; + +const MAX_NODE_LAG_SECS: u64 = 360; + +pub struct ValidatorJoinLeaveTest; + +impl Test for ValidatorJoinLeaveTest { + fn name(&self) -> &'static str { + "validator join and leave sets" + } +} + +impl NetworkLoadTest for ValidatorJoinLeaveTest { + fn setup(&self, _ctx: &mut NetworkContext) -> Result { + Ok(LoadDestination::AllValidators) + } + + fn test(&self, swarm: &mut dyn Swarm, duration: Duration) -> Result<()> { + // Verify we have at least 7 validators (i.e., 3f+1, where f is 2) + // so we can lose 2 validators but still make progress. + let all_validators = swarm.validators().map(|v| v.peer_id()).collect::>(); + let num_validators = all_validators.len(); + if num_validators < 7 { + return Err(anyhow::format_err!( + "ValidatorSet leaving and rejoining test require at least 7 validators! Given: {:?}.", + num_validators + )); + } + + let faucet_endpoint: reqwest::Url = "http://localhost:8081".parse().unwrap(); + // Connect the operator tool to the node's JSON RPC API + let rest_client = swarm.validators().next().unwrap().rest_client(); + let transaction_factory = swarm.chain_info().transaction_factory(); + let runtime = Runtime::new().unwrap(); + + let mut cli = runtime.block_on(async { + CliTestFramework::new( + swarm.validators().next().unwrap().rest_api_endpoint(), + faucet_endpoint, + /*num_cli_accounts=*/ 0, + ) + .await + }); + + let mut public_info = swarm.chain_info().into_aptos_public_info(); + + let mut validator_cli_indices = Vec::new(); + + let starting_seed_in_decimal = i64::from_str_radix(FORGE_KEY_SEED, 16)?; + + for i in 0..num_validators { + // Initialize keyGen to get validator private keys. We uses the same seed in the test + // driver as in the genesis script so that the validator keys are deterministic. + let mut seed_slice = [0u8; 32]; + let seed_in_decimal = starting_seed_in_decimal + (i as i64); + let seed_in_hex_string = format!("{seed_in_decimal:0>64x}"); + + hex::decode_to_slice(seed_in_hex_string, &mut seed_slice)?; + + let mut keygen = KeyGen::from_seed(seed_slice); + + let (validator_cli_index, _keys, account_balance) = runtime.block_on(async { + let (validator_cli_index, keys) = + init_validator_account(&mut cli, &mut keygen).await; + + let auth_key = AuthenticationKey::ed25519(&keys.account_private_key.public_key()); + let validator_account_address = AccountAddress::new(*auth_key.derived_address()); + + public_info + .mint(validator_account_address, DEFAULT_FUNDED_COINS) + .await + .unwrap(); + + let account_balance = public_info + .get_balance(validator_account_address) + .await + .unwrap(); + + (validator_cli_index, keys, account_balance) + }); + assert_eq!(account_balance, DEFAULT_FUNDED_COINS); + validator_cli_indices.push(validator_cli_index); + + assert_eq!( + runtime.block_on(get_validator_state(&cli, validator_cli_index)), + ValidatorState::ACTIVE + ); + } + + // Log the test setup + info!( + "Running validator join and leave test {:?} with {:?} validators.", + self.name(), + num_validators, + ); + + // Wait for all nodes to synchronize and stabilize. + info!("Waiting for the validators to be synchronized."); + runtime.block_on(async { + swarm + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await + })?; + + // Wait for 1/3 of the test duration. + std::thread::sleep(duration / 3); + + runtime.block_on(async { + // 1/3 validators leave the validator set. + info!("Make the last 1/3 validators leave the validator set!"); + for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { + cli.leave_validator_set(*operator_index, None) + .await + .unwrap(); + + reconfig( + &rest_client, + &transaction_factory, + swarm.chain_info().root_account(), + ) + .await; + } + + reconfig( + &rest_client, + &transaction_factory, + swarm.chain_info().root_account(), + ) + .await; + }); + + // Wait for 1/3 of the test duration. + std::thread::sleep(duration / 3); + + runtime.block_on(async { + // Rejoining validator set. + info!("Make the last 1/3 validators rejoin the validator set!"); + for operator_index in validator_cli_indices.iter().rev().take(num_validators / 3) { + cli.join_validator_set(*operator_index, None).await.unwrap(); + + reconfig( + &rest_client, + &transaction_factory, + swarm.chain_info().root_account(), + ) + .await; + } + + reconfig( + &rest_client, + &transaction_factory, + swarm.chain_info().root_account(), + ) + .await; + }); + + // Wait for all nodes to synchronize and stabilize. + info!("Waiting for the validators to be synchronized."); + runtime.block_on(async { + swarm + .wait_for_all_nodes_to_catchup(Duration::from_secs(MAX_NODE_LAG_SECS)) + .await + })?; + + Ok(()) + } +} + +impl NetworkTest for ValidatorJoinLeaveTest { + fn run<'t>(&self, ctx: &mut NetworkContext<'t>) -> Result<()> { + ::run(self, ctx) + } +} + +#[derive(Debug, PartialEq, Eq)] +enum ValidatorState { + ACTIVE, + JOINING, + LEAVING, + NONE, +} + +struct ValidatorNodeKeys { + account_private_key: Ed25519PrivateKey, +} + +impl ValidatorNodeKeys { + pub fn new(keygen: &mut KeyGen) -> Self { + Self { + account_private_key: keygen.generate_ed25519_private_key(), + } + } +} + +async fn init_validator_account( + cli: &mut CliTestFramework, + keygen: &mut KeyGen, +) -> (usize, ValidatorNodeKeys) { + let validator_node_keys = ValidatorNodeKeys::new(keygen); + let validator_cli_index = + cli.add_account_to_cli(validator_node_keys.account_private_key.clone()); + (validator_cli_index, validator_node_keys) +} + +async fn get_validator_state(cli: &CliTestFramework, pool_index: usize) -> ValidatorState { + let validator_set = cli.show_validator_set().await.unwrap(); + let pool_address = cli.account_id(pool_index); + + for (state, list) in [ + (ValidatorState::ACTIVE, &validator_set.active_validators), + (ValidatorState::JOINING, &validator_set.pending_active), + (ValidatorState::LEAVING, &validator_set.pending_inactive), + ] { + if list.iter().any(|info| info.account_address == pool_address) { + return state; + } + } + ValidatorState::NONE +}