Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ chacha20poly1305 = "0.10"
chrono = { version = "0.4", features = ["serde"] }
hex = "0.4"
futures-util = "0.3"
hkdf = "0.12"
hickory-resolver = { version = "0.25", default-features = false, features = ["tokio"] }
hmac = "0.12"
jsonwebtoken = "9"
Expand Down
91 changes: 90 additions & 1 deletion src/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,14 @@ struct St {
/// CP keys off this to look up the tunnel_id.
agent_id: String,
started: Instant,
/// Current Intel-signed JWT. Refreshed by a background task.
/// Current Intel-signed JWT minted over a freshness nonce. Used by the CP's
/// scrape-and-verify loop. Refreshed by a background task.
ita_token: Arc<RwLock<String>>,
/// Intel-signed JWT minted over the *Noise* quote (binds the Noise pubkey in
/// `report_data`). Served as `noise.ita_token` so clients can verify the
/// agent's attestation against Intel's public JWKS without an ITA account of
/// their own — they verify, they don't mint. Refreshed alongside `ita_token`.
noise_ita_token: Arc<RwLock<String>>,
/// Live set of per-workload ingress rules this agent has asked
/// the CP to publish. Seeded from boot `cfg.extra_ingress`;
/// appended each time a POSTed workload declares `expose`. The
Expand Down Expand Up @@ -174,6 +180,34 @@ pub async fn run() -> Result<()> {
.map_err(|e| Error::Internal(format!("noise keypair: {e}")))?,
);
eprintln!("agent: noise_pubkey={}", hex::encode(attestor.public_key()));

// Mint an ITA token over the Noise quote so clients can verify the agent's
// attestation against Intel's public JWKS without minting (and thus without
// an Intel account). Stable per boot; refreshed before expiry like the
// registration token.
let noise_ita_token = Arc::new(RwLock::new(
mint_noise_ita(&cfg, &attestor).await.unwrap_or_else(|e| {
eprintln!("agent: initial noise ITA mint failed ({e}); serving empty until refresh");
String::new()
}),
));
{
let cfg = cfg.clone();
let attestor = attestor.clone();
let token = noise_ita_token.clone();
tokio::spawn(async move {
loop {
tokio::time::sleep(ITA_REFRESH).await;
match mint_noise_ita(&cfg, &attestor).await {
Ok(t) => *token.write().await = t,
Err(e) => {
eprintln!("agent: noise ITA refresh failed (keeping stale token): {e}")
}
}
}
});
}

let ee_token = std::env::var("EE_TOKEN").ok();
let upstream = Arc::new(noise_gateway::upstream::EeAgent::new(
std::path::PathBuf::from(noise_gateway::upstream::DEFAULT_EE_AGENT_SOCK),
Expand Down Expand Up @@ -212,6 +246,7 @@ pub async fn run() -> Result<()> {
agent_id: b.agent_id,
started: Instant::now(),
ita_token,
noise_ita_token,
extras: Arc::new(RwLock::new(cfg.extra_ingress.clone())),
gh,
attest: attestor,
Expand All @@ -223,6 +258,23 @@ pub async fn run() -> Result<()> {
http: crate::system_http_client(),
devices,
};
// Seed sessiond's E2E history recipient set in the background, retrying
// until sessiond is up. Without this, sessiond starts with no recipients
// and (correctly) refuses to persist history until a device mutation pushes
// the set. Re-pushed on every later enroll/revoke from their handlers.
{
let seed_state = state.clone();
tokio::spawn(async move {
for _ in 0..60 {
if push_history_recipients(&seed_state).await.is_ok() {
return;
}
tokio::time::sleep(Duration::from_secs(2)).await;
}
eprintln!("agent: gave up seeding sessiond history recipients after retries");
});
}

let api_state = state.clone();
let api_ng_state = ng_state.clone();

Expand Down Expand Up @@ -487,6 +539,18 @@ async fn mint_ita(cfg: &Cfg, ee: &Ee) -> Result<String> {
ita::mint(&cfg.ita.base_url, &cfg.ita.api_key, &quote_b64).await
}

/// Mint an Intel-signed appraisal of the *Noise* quote (the one binding the
/// Noise pubkey into `report_data`). Served on `/health` as `noise.ita_token`
/// so clients can verify it against Intel's public JWKS without an account.
async fn mint_noise_ita(cfg: &Cfg, attest: &noise_gateway::attest::Attestor) -> Result<String> {
if cfg.ita.mode == ItaMode::Local {
return ita::mint_local(&cfg.ita.issuer, &cfg.ita.api_key, &cfg.common.vm_name);
}
use base64::Engine;
let quote_b64 = base64::engine::general_purpose::STANDARD.encode(attest.quote());
ita::mint(&cfg.ita.base_url, &cfg.ita.api_key, &quote_b64).await
}

fn spawn_cloudflared(token: String) {
tokio::spawn(async move {
eprintln!("agent: spawning cloudflared");
Expand Down Expand Up @@ -546,6 +610,7 @@ async fn health(State(s): State<St>) -> Json<serde_json::Value> {
.unwrap_or_default();
let m = metrics::collect().await;
let ita_token = s.ita_token.read().await.clone();
let noise_ita_token = s.noise_ita_token.read().await.clone();
let agent_owner = s.agent_owner.read().await.clone();
let oracles = s.oracles.read().await.clone();
let taint_reasons = s.taint.snapshot().await;
Expand Down Expand Up @@ -630,6 +695,10 @@ async fn health(State(s): State<St>) -> Json<serde_json::Value> {
"noise": {
"quote_b64": base64::engine::general_purpose::STANDARD.encode(s.attest.quote()),
"pubkey_hex": hex::encode(s.attest.public_key()),
// Intel-signed appraisal of the Noise quote. Clients verify this
// against Intel's public JWKS (no account) instead of minting their
// own. Empty if minting failed at boot / in local ITA mode.
"ita_token": noise_ita_token,
},
}))
}
Expand Down Expand Up @@ -970,6 +1039,9 @@ async fn create_device(
.upsert(device.clone())
.await
.map_err(|e| Error::Internal(format!("devices upsert: {e}")))?;
if let Err(e) = push_history_recipients(&s).await {
eprintln!("agent: failed to push history recipients after enroll: {e}");
}
Ok((axum::http::StatusCode::CREATED, Json(device)))
}

Expand All @@ -991,6 +1063,9 @@ async fn revoke_device(
if !ok {
return Err(Error::NotFound);
}
if let Err(e) = push_history_recipients(&s).await {
eprintln!("agent: failed to push history recipients after revoke: {e}");
}
Ok(Json(serde_json::json!({
"revoked": pubkey,
"at_ms": now,
Expand Down Expand Up @@ -1144,6 +1219,20 @@ async fn attach_to_sessiond(
Ok(())
}

/// Push the current non-revoked device pubkey set to `dd-sessiond` so it seals
/// persisted history to exactly those recipients. Called at startup (with retry,
/// since sessiond may still be coming up) and after every device mutation.
async fn push_history_recipients(s: &St) -> Result<()> {
let pubkeys = s.devices.live_pubkeys().await;
sessiond_post_empty_json(
s,
"/api/recipients",
&serde_json::json!({ "pubkeys": pubkeys }),
)
.await
.map(|_| ())
}

async fn sessiond_get<T: DeserializeOwned>(s: &St, path: &str) -> Result<T> {
let url = format!("{}{}", s.sessiond_http_url.trim_end_matches('/'), path);
let resp = s.http.get(url).send().await?;
Expand Down
10 changes: 10 additions & 0 deletions src/collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ pub async fn run(
cp_hostname: String,
ee: Arc<Ee>,
verifier: Arc<ita::Verifier>,
expected: Arc<ita::ExpectedMeasurements>,
wake: Arc<Notify>,
scrape_interval: Duration,
discovery_interval: Duration,
Expand Down Expand Up @@ -270,6 +271,7 @@ pub async fn run(
&prefix,
&ee,
&verifier,
&expected,
&env_label,
&cp_hostname,
discover,
Expand All @@ -289,6 +291,7 @@ async fn tick(
prefix: &str,
ee: &Arc<Ee>,
verifier: &Arc<ita::Verifier>,
expected: &Arc<ita::ExpectedMeasurements>,
env_label: &str,
cp_hostname: &str,
discover: bool,
Expand Down Expand Up @@ -375,6 +378,13 @@ async fn tick(
continue;
}
};
if let Err(reason) = expected.check(&claims) {
if expected.enforce {
eprintln!("cp: collector: {name} measurement mismatch — dropping: {reason}");
continue;
}
eprintln!("cp: collector: {name} measurement mismatch (not enforced): {reason}");
}
// Store key is the tunnel name (authoritative on the CP side),
// NOT the agent's self-reported agent_id.
let mut s = store.lock().await;
Expand Down
27 changes: 27 additions & 0 deletions src/cp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ struct St {
collector_wake: Arc<Notify>,
started: Instant,
verifier: Arc<ita::Verifier>,
/// Expected enclave measurement allowlist (MRTD/TCB). Pins the fleet to
/// known-good code; unpinned = observe-only.
expected: Arc<ita::ExpectedMeasurements>,
/// The CP's own ITA token. Refreshed by a background task.
cp_ita_token: Arc<RwLock<String>>,
/// GH OIDC verifier for `/api/agents` callers (CI, humans). Same
Expand Down Expand Up @@ -289,6 +292,18 @@ pub async fn run() -> Result<()> {
// Start the collector with the verifier. It re-verifies each
// scraped agent's ita_token, so expired / revoked / unsigned
// agents drop off the dashboard automatically.
let expected = Arc::new(ita::ExpectedMeasurements::from_env());
if expected.is_pinned() {
eprintln!(
"cp: measurement pinning ON ({} mrtd(s), tcb={}, enforce={})",
expected.mrtds.len(),
expected.tcb_status.as_deref().unwrap_or("any"),
expected.enforce
);
} else {
eprintln!("cp: measurement pinning OFF (DD_EXPECTED_MRTD unset) — observe only");
}

let collector_wake = Arc::new(Notify::new());
tokio::spawn(collector::run(
store.clone(),
Expand All @@ -297,6 +312,7 @@ pub async fn run() -> Result<()> {
cfg.hostname.clone(),
ee.clone(),
verifier.clone(),
expected.clone(),
collector_wake.clone(),
Duration::from_secs(cfg.scrape_interval_secs),
Duration::from_secs(cfg.discovery_interval_secs),
Expand All @@ -313,6 +329,7 @@ pub async fn run() -> Result<()> {
collector_wake,
started: Instant::now(),
verifier,
expected,
cp_ita_token,
gh,
};
Expand Down Expand Up @@ -495,6 +512,16 @@ async fn register(
ita_claims.mrtd.as_deref().unwrap_or("?"),
ita_claims.tcb_status.as_deref().unwrap_or("?")
);
if let Err(reason) = s.expected.check(&ita_claims) {
if s.expected.enforce {
eprintln!("cp: REJECT register {}: {reason}", req.vm_name);
return Err(Error::Unauthorized);
}
eprintln!(
"cp: WARN register {} measurement mismatch (not enforced): {reason}",
req.vm_name
);
}

let http = cf::http_client();
let name = cf::agent_tunnel_name(&s.cfg.common.env_label);
Expand Down
13 changes: 13 additions & 0 deletions src/devices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,19 @@ impl Store {
self.inner.read().await.values().cloned().collect()
}

/// Hex pubkeys of all non-revoked devices — the recipient set that
/// `dd-sessiond` seals persisted history to. Mirrors the membership of the
/// noise-gateway [`TrustHandle`], but as hex strings ready to POST.
pub async fn live_pubkeys(&self) -> Vec<String> {
self.inner
.read()
.await
.values()
.filter(|d| d.revoked_at_ms.is_none())
.map(|d| d.pubkey.clone())
.collect()
}

pub async fn upsert(&self, device: Device) -> anyhow::Result<()> {
{
let mut w = self.inner.write().await;
Expand Down
Loading
Loading