Skip to content

Commit

Permalink
Allow reducing HTTP's pool idle timeout in watchtower
Browse files Browse the repository at this point in the history
Setting pool idle timeout to a value smaller than watchtower's poll
interval can fix following error:

	[2022-08-25T04:03:22.811160892Z INFO  solana_watchtower] Failure 1 of 3: solana-watchtower testnet: Error: rpc-error: error sending request for url (https://api.testnet.solana.com/): connection closed before message completed

It looks like this happens because either RPC servers or ISPs drop HTTP
connections without properly notifying the client in some cases.

Similar issue: hyperium/hyper#2136.
  • Loading branch information
im-0 committed Aug 26, 2022
1 parent 878c8af commit df4c46c
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
18 changes: 17 additions & 1 deletion rpc-client/src/http_sender.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use {
pub struct HttpSenderBuilder {
url: String,
timeout: Duration,
pool_idle_timeout: Option<Duration>,
}

impl HttpSenderBuilder {
Expand All @@ -42,6 +43,7 @@ impl HttpSenderBuilder {
Self {
url: url.to_string(),
timeout: Duration::from_secs(30),
pool_idle_timeout: Some(Duration::from_secs(90)),
}
}

Expand All @@ -58,7 +60,8 @@ impl HttpSenderBuilder {

let reqwest_builder = reqwest::Client::builder()
.default_headers(default_headers)
.timeout(self.timeout);
.timeout(self.timeout)
.pool_idle_timeout(self.pool_idle_timeout);

let client = Arc::new(reqwest_builder.build().expect("build rpc client"));

Expand All @@ -75,6 +78,19 @@ impl HttpSenderBuilder {
self.timeout = timeout;
self
}

/// Set an optional timeout for idle sockets being kept-alive.
///
/// Pass `None` to disable timeout.
///
/// Default is 90 seconds.
pub fn pool_idle_timeout<D>(mut self, timeout: D) -> Self
where
D: Into<Option<Duration>>,
{
self.pool_idle_timeout = timeout.into();
self
}
}

pub struct HttpSender {
Expand Down
17 changes: 15 additions & 2 deletions watchtower/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ struct Config {
interval: Duration,
json_rpc_url: String,
rpc_timeout: Duration,
rpc_pool_idle_timeout: Duration,
minimum_validator_identity_balance: u64,
monitor_active_stake: bool,
unhealthy_threshold: usize,
Expand Down Expand Up @@ -93,6 +94,14 @@ fn get_config() -> Config {
.default_value("30")
.help("Timeout value for RPC requests"),
)
.arg(
Arg::with_name("rpc_pool_idle_timeout")
.long("rpc-pool-idle-timeout")
.value_name("SECONDS")
.takes_value(true)
.default_value("90")
.help("Timeout for idle HTTP RPC sockets being kept-alive"),
)
.arg(
Arg::with_name("interval")
.long("interval")
Expand Down Expand Up @@ -175,6 +184,8 @@ fn get_config() -> Config {
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url.clone());
let rpc_timeout = value_t_or_exit!(matches, "rpc_timeout", u64);
let rpc_timeout = Duration::from_secs(rpc_timeout);
let rpc_pool_idle_timeout = value_t_or_exit!(matches, "rpc_pool_idle_timeout", u64);
let rpc_pool_idle_timeout = Duration::from_secs(rpc_pool_idle_timeout);
let validator_identity_pubkeys: Vec<_> = pubkeys_of(&matches, "validator_identities")
.unwrap_or_default()
.into_iter()
Expand All @@ -191,6 +202,7 @@ fn get_config() -> Config {
interval,
json_rpc_url,
rpc_timeout,
rpc_pool_idle_timeout,
minimum_validator_identity_balance,
monitor_active_stake,
unhealthy_threshold,
Expand Down Expand Up @@ -236,8 +248,9 @@ fn main() -> Result<(), Box<dyn error::Error>> {

let config = get_config();

let http_sender_builder =
HttpSenderBuilder::new(config.json_rpc_url.clone()).timeout(config.rpc_timeout);
let http_sender_builder = HttpSenderBuilder::new(config.json_rpc_url.clone())
.timeout(config.rpc_timeout)
.pool_idle_timeout(config.rpc_pool_idle_timeout);

let rpc_client = RpcClient::new_sender(http_sender_builder.build(), RpcClientConfig::default());
let notifier = Notifier::default();
Expand Down

0 comments on commit df4c46c

Please sign in to comment.