Skip to content

Commit ea30a2f

Browse files
committed
fix(secrets): stop vault refresh task leaks and handle token expiry
1 parent 1ed17c5 commit ea30a2f

File tree

4 files changed

+65
-9
lines changed

4 files changed

+65
-9
lines changed

voice/engine/src/server.rs

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,29 @@ impl RegisteredSession {
185185
}
186186
}
187187

188+
impl Drop for RegisteredSession {
189+
fn drop(&mut self) {
190+
// Abort the background secret refresh task when the session is dropped.
191+
//
192+
// This covers all early-exit paths where `run_session_with_transport` is
193+
// never reached (and thus never calls `h.abort()`):
194+
// - TTL cleanup task evicts unconnected sessions from the DashMap
195+
// - WebRTC ICE connection timeout / failure
196+
// - Telephony session_id mismatch (start-message timeout → random UUID)
197+
//
198+
// Note: `take_refresh_handle` removes the handle from the inner Option,
199+
// so for the normal path — where the caller already took the handle and
200+
// called `h.abort()` inside `run_session_with_transport` — this no-ops.
201+
if let Some(arc) = &self.secret_refresh_handle {
202+
if let Ok(mut guard) = arc.lock() {
203+
if let Some(h) = guard.take() {
204+
h.abort();
205+
}
206+
}
207+
}
208+
}
209+
}
210+
188211
impl std::fmt::Debug for RegisteredSession {
189212
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
190213
f.debug_struct("RegisteredSession")
@@ -545,7 +568,7 @@ async fn handle_registered_socket(socket: WebSocket, session_id: String, state:
545568
run_voice_session(
546569
session_id,
547570
socket,
548-
registered.config,
571+
registered.config.clone(),
549572
&registered.providers,
550573
secrets,
551574
refresh_handle,
@@ -1100,7 +1123,7 @@ async fn rtc_offer_registered(
11001123
rtc_create_session(
11011124
session_id,
11021125
body,
1103-
registered.config,
1126+
registered.config.clone(),
11041127
&registered.providers,
11051128
secrets,
11061129
refresh_handle,
@@ -1572,8 +1595,8 @@ async fn handle_telephony_session(
15721595
let from_number = reg.from_number.clone();
15731596
(
15741597
effective_session_id.clone(),
1575-
reg.config,
1576-
reg.providers,
1598+
reg.config.clone(),
1599+
reg.providers.clone(),
15771600
secrets,
15781601
refresh_handle,
15791602
reg_tracer,

voice/server/src/secrets.rs

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,27 @@
22
//!
33
//! Owns the lifecycle of per-session secrets:
44
//! 1. Initial resolution at session registration time
5-
//! 2. Background refresh task that re-queries the vault every 90 seconds
5+
//! 2. Background refresh task that re-queries the vault every 90 seconds,
6+
//! automatically renewing the scoped vault token if it expires.
67
//!
78
//! voice-engine receives a pre-populated `SharedSecretMap` and never touches
89
//! the vault directly. This keeps voice-engine free of infrastructure concerns.
910
1011
use std::sync::Arc;
12+
use std::time::Duration;
1113

1214
use agent_kit::agent_backends::{SecretMap, SharedSecretMap};
1315
use tracing::warn;
1416

17+
use crate::cred::VaultHandle;
1518
use crate::vault_client;
1619

20+
/// Scoped vault token TTL issued to each session.
21+
///
22+
/// If a session runs longer than 1 hour, the token is automatically renewed
23+
/// by the background refresh task.
24+
pub const VAULT_TOKEN_TTL: Duration = Duration::from_secs(3600);
25+
1726
/// Cached vault address from `VAULT_ADDR` environment variable.
1827
///
1928
/// Returns `None` if `VAULT_ADDR` is not set. The value is read once and
@@ -67,14 +76,16 @@ const SECRET_REFRESH_INTERVAL_SECS: u64 = 90;
6776
/// and updates the shared secret map.
6877
///
6978
/// Returns a `JoinHandle` — the caller should abort it when the session ends.
70-
/// If `vault_token` is `None`, no task is spawned (returns `None`).
79+
/// If `vault` or `vault_token` is `None`, no task is spawned (returns `None`).
7180
pub fn spawn_secret_refresh_task(
7281
agent_id: String,
82+
vault: Option<Arc<VaultHandle>>,
7383
vault_token: Option<String>,
7484
secrets: SharedSecretMap,
7585
) -> Option<tokio::task::JoinHandle<()>> {
7686
let addr = vault_addr()?.to_string();
77-
let token = vault_token?;
87+
let vault = vault?;
88+
let mut token = vault_token?;
7889

7990
let Ok(agent_uuid) = uuid::Uuid::parse_str(&agent_id) else {
8091
return None;
@@ -112,6 +123,16 @@ pub fn spawn_secret_refresh_task(
112123
}
113124
}
114125
}
126+
Err(vault_client::VaultResolveError::TokenExpired) => {
127+
// The scoped token expired — mint a fresh one and retry
128+
// immediately on the next tick rather than stopping the task.
129+
// This keeps secrets refreshing for arbitrarily long sessions.
130+
tracing::info!(
131+
agent_id = %agent_id,
132+
"Vault scoped token expired — renewing for session"
133+
);
134+
token = vault.create_scoped_token(agent_uuid, VAULT_TOKEN_TTL);
135+
}
115136
Err(e) => {
116137
tracing::warn!(
117138
agent_id = %agent_id,

voice/server/src/telephony.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
//! are NOT duplicated here.
1313
1414
use std::sync::Arc;
15-
use std::time::{Duration, Instant};
15+
use std::time::Instant;
1616

1717
use crate::cred::{EncryptionEngine, VaultHandle};
1818
use crate::observability;
@@ -65,7 +65,7 @@ impl AppState {
6565
pub fn vault_token_for(&self, agent_uuid: Uuid) -> Option<String> {
6666
self.vault
6767
.as_ref()
68-
.map(|v| v.create_scoped_token(agent_uuid, Duration::from_secs(3600)))
68+
.map(|v| v.create_scoped_token(agent_uuid, crate::secrets::VAULT_TOKEN_TTL))
6969
}
7070
}
7171

@@ -498,6 +498,7 @@ pub async fn register_session(
498498
crate::secrets::resolve_vault_secrets(&agent_id_for_secrets, vault_token.as_deref()).await;
499499
let refresh_handle = crate::secrets::spawn_secret_refresh_task(
500500
agent_id_for_secrets,
501+
state.vault.clone(),
501502
vault_token_for_refresh,
502503
secrets.clone(),
503504
);

voice/server/src/vault_client.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ pub async fn resolve_secrets(
5353
);
5454
return Ok(SecretMap::new());
5555
}
56+
Err(vaultrs::error::ClientError::APIError { code: 403, .. }) => {
57+
// Scoped token has expired.
58+
warn!(
59+
agent_id = %agent_id,
60+
"Vault scoped token expired (403)"
61+
);
62+
return Err(VaultResolveError::TokenExpired);
63+
}
5664
Err(e) => {
5765
warn!(
5866
agent_id = %agent_id,
@@ -85,4 +93,7 @@ pub enum VaultResolveError {
8593
ClientConfig(String),
8694
#[error("failed to read secrets from vault: {0}")]
8795
ReadFailed(String),
96+
/// The scoped vault token has expired (HTTP 403).
97+
#[error("vault scoped token expired")]
98+
TokenExpired,
8899
}

0 commit comments

Comments
 (0)