Skip to content

Commit 1edecd1

Browse files
authored
fix(orchestrator): properly log some messages (#5992)
While debugging orchestrator logs I noticed that some logs do not appear, not only in Kibana but also in systemd journal. Changes: 1. Pass `ReplicaLogger` instead of `slog::Logger` to some utility functions. For some reason (I didn't investigate) `slog::log!(replica_logger.inner.root)` always logs an empty message while `log!(replica_logger)` properly logs a message (see [Kibana logs](https://kibana.testnet.dfinity.network/app/discover#/?_g=(filters:!(),refreshInterval:(pause:!t,value:60000),time:(from:now-1w,to:now))&_a=(columns:!(host.name,message,level,crate_,module),filters:!(('$state':(store:appState),meta:(alias:!n,disabled:!f,index:'535335e8-7195-4af7-95ab-e59ab3bb8056',key:module,negate:!f,params:!(process_manager,ic_http_endpoints_async_utils),type:phrases,value:!(process_manager,ic_http_endpoints_async_utils)),query:(bool:(minimum_should_match:1,should:!((match_phrase:(module:process_manager)),(match_phrase:(module:ic_http_endpoints_async_utils))))))),grid:(columns:(host.name:(width:359))),hideChart:!f,index:'535335e8-7195-4af7-95ab-e59ab3bb8056',interval:auto,query:(language:kuery,query:''),sort:!(!('@timestamp',desc)))) and note that logs from `process_manager` and `ic_http_endpoints_async_utils` appear in Kibana only for system tests running on this branch) 2. Move creation of `ReplicaLogger` to the `main` function of `Orchestrator`. Currently, the messages logged after the `Orchestrator` struct is dropped might not be printed because the logs are flushed when the [_async_drop_guard](https://docs.rs/slog-async/latest/slog_async/struct.AsyncGuard.html) is dropped, which happens in the `Orchestrator` struct's destructor
1 parent 36cffa3 commit 1edecd1

File tree

12 files changed

+41
-34
lines changed

12 files changed

+41
-34
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rs/bitcoin/adapter/bin/ic-btc-adapter/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,5 @@ pub async fn main() {
5050
&tokio::runtime::Handle::current(),
5151
config,
5252
);
53-
shutdown_signal(log.inner_logger.root.clone()).await;
53+
shutdown_signal(log.clone()).await;
5454
}

rs/http_endpoints/async_utils/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ rust_library(
99
version = "0.9.0",
1010
deps = [
1111
# Keep sorted.
12+
"//rs/monitoring/logger",
1213
"@crate_index//:anyhow",
1314
"@crate_index//:async-stream",
1415
"@crate_index//:byte-unit",

rs/http_endpoints/async_utils/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ bytes = { workspace = true }
1414
futures = { workspace = true }
1515
futures-util = { workspace = true }
1616
hyper = { workspace = true }
17+
ic-logger = { path = "../../monitoring/logger" }
1718
slog = { workspace = true }
1819
tokio = { workspace = true }
1920
tonic = { workspace = true }

rs/http_endpoints/async_utils/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
use slog::{info, Logger};
2-
31
mod hyper;
42
mod join_map;
53
mod unix;
64

5+
use ic_logger::{info, ReplicaLogger};
6+
77
pub use self::{
88
hyper::ExecuteOnTokioRuntime,
99
join_map::JoinMap,
@@ -22,7 +22,7 @@ pub fn abort_on_panic() {
2222
/// Returns a `Future` that completes when the service should gracefully
2323
/// shutdown. Completion happens if either of `SIGINT` or `SIGTERM` are
2424
/// received.
25-
pub async fn shutdown_signal(log: Logger) {
25+
pub async fn shutdown_signal(log: ReplicaLogger) {
2626
use tokio::signal::unix::{signal, SignalKind};
2727
let mut sig_int =
2828
signal(SignalKind::interrupt()).expect("failed to install SIGINT signal handler");

rs/https_outcalls/adapter/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,5 +52,5 @@ pub async fn main() {
5252
&tokio::runtime::Handle::current(),
5353
config,
5454
);
55-
shutdown_signal(logger.inner_logger.root.clone()).await;
55+
shutdown_signal(logger.clone()).await;
5656
}

rs/orchestrator/src/args.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ impl OrchestratorArgs {
8484

8585
/// Parse `self.replica_config_file` and persist in
8686
/// [`TempDir`][tempfile::TempDir] "ic_config"
87-
pub(crate) fn get_ic_config(&self) -> Config {
87+
pub fn get_ic_config(&self) -> Config {
8888
let tmpdir = tempfile::Builder::new()
8989
.prefix("ic_config")
9090
.tempdir()

rs/orchestrator/src/boundary_node.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,7 @@ impl BoundaryNodeManager {
6767
Self {
6868
registry,
6969
_metrics: metrics,
70-
process: Arc::new(Mutex::new(ProcessManager::new(
71-
logger.clone().inner_logger.root,
72-
))),
70+
process: Arc::new(Mutex::new(ProcessManager::new(logger.clone()))),
7371
ic_binary_dir,
7472
crypto_config,
7573
version,

rs/orchestrator/src/main.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,21 @@
11
use clap::Parser;
22
use ic_http_endpoints_async_utils::shutdown_signal;
3-
use ic_logger::{info, warn};
3+
use ic_logger::{info, new_replica_logger_from_config, warn};
44
use orchestrator::{args::OrchestratorArgs, orchestrator::Orchestrator};
55

66
#[tokio::main]
77
async fn main() {
88
let args = OrchestratorArgs::parse();
9+
let config = args.get_ic_config();
10+
let (logger, _async_log_guard) = new_replica_logger_from_config(&config.orchestrator_logger);
911

1012
let (exit_sender, exit_signal) = tokio::sync::watch::channel(false);
1113

12-
let mut orchestrator = Orchestrator::new(args)
14+
let mut orchestrator = Orchestrator::new(args, &config, logger.clone())
1315
.await
1416
.expect("Failed to start orchestrator");
15-
let logger = orchestrator.logger.clone();
1617
let join_handle = tokio::spawn(async move { orchestrator.start_tasks(exit_signal).await });
17-
shutdown_signal(logger.inner_logger.root.clone()).await;
18+
shutdown_signal(logger.clone()).await;
1819

1920
exit_sender.send(true).expect("Failed to send exit signal");
2021

rs/orchestrator/src/orchestrator.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,19 @@ use crate::{
1515
};
1616
use backoff::ExponentialBackoffBuilder;
1717
use get_if_addrs::get_if_addrs;
18-
use ic_config::metrics::{Config as MetricsConfig, Exporter};
18+
use ic_config::{
19+
metrics::{Config as MetricsConfig, Exporter},
20+
Config,
21+
};
1922
use ic_crypto::CryptoComponent;
2023
use ic_crypto_node_key_generation::{generate_node_keys_once, NodeKeyGenerationError};
2124
use ic_http_endpoints_metrics::MetricsHttpEndpoint;
2225
use ic_image_upgrader::ImageUpgrader;
23-
use ic_logger::{error, info, new_replica_logger_from_config, warn, ReplicaLogger};
26+
use ic_logger::{error, info, warn, ReplicaLogger};
2427
use ic_metrics::MetricsRegistry;
2528
use ic_registry_replicator::RegistryReplicator;
2629
use ic_sys::utility_command::UtilityCommand;
2730
use ic_types::{hostos_version::HostosVersion, ReplicaVersion, SubnetId};
28-
use slog_async::AsyncGuard;
2931
use std::{
3032
collections::HashMap,
3133
convert::TryFrom,
@@ -42,7 +44,6 @@ const CHECK_INTERVAL_SECS: Duration = Duration::from_secs(10);
4244

4345
pub struct Orchestrator {
4446
pub logger: ReplicaLogger,
45-
_async_log_guard: AsyncGuard,
4647
_metrics_runtime: MetricsHttpEndpoint,
4748
upgrade: Option<Upgrade>,
4849
hostos_upgrade: Option<HostosUpgrader>,
@@ -83,10 +84,13 @@ fn load_version_from_file(logger: &ReplicaLogger, path: &Path) -> Result<Replica
8384
}
8485

8586
impl Orchestrator {
86-
pub async fn new(args: OrchestratorArgs) -> Result<Self, OrchestratorInstantiationError> {
87+
pub async fn new(
88+
args: OrchestratorArgs,
89+
config: &Config,
90+
logger: ReplicaLogger,
91+
) -> Result<Self, OrchestratorInstantiationError> {
8792
args.create_dirs();
8893
let metrics_addr = args.get_metrics_addr();
89-
let config = args.get_ic_config();
9094
let crypto_config = config.crypto.clone();
9195
let node_id = tokio::task::spawn_blocking(move || {
9296
generate_node_keys_once(&crypto_config, Some(tokio::runtime::Handle::current()))
@@ -100,8 +104,6 @@ impl Orchestrator {
100104
.await
101105
.unwrap()?;
102106

103-
let (logger, _async_log_guard) =
104-
new_replica_logger_from_config(&config.orchestrator_logger);
105107
let metrics_registry = MetricsRegistry::global();
106108
let replica_version = load_version_from_file(&logger, &args.version_file)
107109
.map_err(|()| OrchestratorInstantiationError::VersionFileError)?;
@@ -148,11 +150,11 @@ impl Orchestrator {
148150
let registry_replicator = Arc::new(RegistryReplicator::new_from_config(
149151
logger.clone(),
150152
Some(node_id),
151-
&config,
153+
config,
152154
));
153155

154156
let (nns_urls, nns_pub_key) =
155-
registry_replicator.parse_registry_access_info_from_config(&config);
157+
registry_replicator.parse_registry_access_info_from_config(config);
156158

157159
match registry_replicator
158160
.start_polling(nns_urls, nns_pub_key)
@@ -213,7 +215,7 @@ impl Orchestrator {
213215
registry_local_store.clone(),
214216
);
215217

216-
let replica_process = Arc::new(Mutex::new(ProcessManager::new(slog_logger.clone())));
218+
let replica_process = Arc::new(Mutex::new(ProcessManager::new(logger.clone())));
217219
let ic_binary_directory = args
218220
.ic_binary_directory
219221
.as_ref()
@@ -330,7 +332,6 @@ impl Orchestrator {
330332

331333
Ok(Self {
332334
logger,
333-
_async_log_guard,
334335
_metrics_runtime,
335336
upgrade,
336337
hostos_upgrade,

0 commit comments

Comments
 (0)