From 88f6400a71159b9e7e8e22ecc7a16a6143928421 Mon Sep 17 00:00:00 2001 From: Zious Date: Sun, 12 Apr 2026 21:34:30 -0400 Subject: [PATCH 1/5] feat(tls): flag SNI hostnames containing ASCII control bytes (C0+DEL) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #54. The TLS analyzer previously silently accepted SNI hostnames containing bytes in 0x00-0x1F or 0x7F (e.g., "foo\x1b[31m.example") into the pure-ASCII Ascii(String) variant — `char::is_ascii()` returns true for every C0 byte and DEL, so no finding was emitted and the raw bytes were inserted into sni_counts unchanged. RFC 6066 §3 requires ASCII, and the DNS preferred hostname syntax (RFC 952 / RFC 1123, preserved under RFC 5890 A-label construction) restricts to LDH — no control codes. An SNI with embedded control bytes is simultaneously a protocol violation, a potential terminal-injection vector, and a plausible log-poisoning / evasion / covert-channel signal. Implementation mirrors the existing NonAsciiUtf8 pattern: * New SniValue::AsciiWithControl { hostname, hex } variant * Detection range b < 0x20 || b == 0x7f (full C0 + DEL — narrower ranges like ESC+DEL alone would miss BEL/CR/LF/tab variants) * Anomaly / Inconclusive / Low finding with hex evidence * Per ADR 0003, sni_counts and finding summary store raw bytes; terminal reporter escapes C0+DEL+C1+backslash at render time 10 new tests pin: primary ESC-injection shape, BEL/DEL/tab/CR/LF variants, regressions (printable ASCII + Punycode A-label), mutual exclusivity with NonAsciiUtf8, C0 boundary pair (0x00, 0x1F, 0x20), and single-finding-per-hostname cardinality with multi-byte hex evidence preservation. --- src/analyzer/tls.rs | 69 +++++++- tests/tls_analyzer_tests.rs | 316 ++++++++++++++++++++++++++++++++++++ 2 files changed, 377 insertions(+), 8 deletions(-) diff --git a/src/analyzer/tls.rs b/src/analyzer/tls.rs index 3eecad4..0113585 100644 --- a/src/analyzer/tls.rs +++ b/src/analyzer/tls.rs @@ -149,9 +149,17 @@ fn compute_ja3s(version: u16, cipher: TlsCipherSuiteID, extensions: &[TlsExtensi /// /// RFC 6066 §3 specifies that the `HostName` field is "represented as a byte /// string using ASCII encoding". Internationalized names use A-labels (RFC 5890, -/// `xn--…` Punycode form), which are also ASCII. Two RFC violations are tracked +/// `xn--…` Punycode form), which are also ASCII. Three RFC violations are tracked /// separately: /// +/// - `AsciiWithControl` — bytes are pure ASCII but contain at least one C0 control +/// byte (0x00–0x1F) or DEL (0x7F). RFC 6066 §3 requires ASCII; the DNS preferred +/// hostname syntax (RFC 952 / RFC 1123, preserved under RFC 5890 A-label +/// construction) restricts to letters, digits, and hyphens — no whitespace, +/// no control codes. An SNI like `"foo\x1b[31m.example"` is +/// simultaneously a protocol violation, a potential terminal-injection vector +/// (rendered via the terminal reporter, which escapes at display time per +/// ADR 0003), and a plausible evasion / log-poisoning / covert-channel signal. /// - `NonAsciiUtf8` — bytes decode as valid UTF-8 but contain non-ASCII codepoints /// (e.g. a raw U-label "café.example"). RFC 6066 says these MUST be Punycode-encoded /// before transmission. Major TLS clients (rustls, Chrome/BoringSSL, Firefox/NSS, @@ -160,12 +168,17 @@ fn compute_ja3s(version: u16, cipher: TlsCipherSuiteID, extensions: &[TlsExtensi /// or an attacker tool. /// - `NonUtf8` — bytes can't decode as UTF-8 at all. Strictly malformed. /// -/// Both are surfaced as Anomaly findings; usually a client bug, but worth forensic -/// review. +/// All three are surfaced as Anomaly findings; usually a client bug or adversarial +/// input, but worth forensic review. enum SniValue { - /// Hostname is pure ASCII — RFC 6066 compliant. May be a literal hostname or - /// an A-label (Punycode) form like `xn--caf-dma.example`. + /// Hostname is pure printable ASCII (0x20–0x7E) — RFC 6066 compliant. May be + /// a literal hostname or an A-label (Punycode) form like `xn--caf-dma.example`. Ascii(String), + /// Hostname is pure ASCII but contains at least one C0 control byte + /// (0x00–0x1F) or DEL (0x7F). `hostname` is the raw String (safe because + /// pure ASCII is always valid UTF-8); `hex` is the lossless lowercase hex + /// of the raw bytes for forensic evidence. + AsciiWithControl { hostname: String, hex: String }, /// Hostname decodes as valid UTF-8 but contains at least one byte ≥ 0x80. /// `hostname` is the decoded String (always valid UTF-8); `hex` is the lossless /// lowercase hex of the raw bytes for forensic evidence. @@ -175,6 +188,17 @@ enum SniValue { NonUtf8 { lossy: String, hex: String }, } +/// Returns true if any byte is a C0 control (0x00–0x1F) or DEL (0x7F). +/// +/// Callers must ensure `s` is pure ASCII before calling — the byte-level check +/// is only meaningful for ASCII strings. For non-ASCII UTF-8 it would produce +/// false negatives on multi-byte codepoints (whose continuation bytes are all +/// ≥ 0x80) and is redundant since non-ASCII already signals a protocol violation +/// via `NonAsciiUtf8`. +fn contains_c0_or_del(s: &str) -> bool { + s.bytes().any(|b| b < 0x20 || b == 0x7f) +} + /// Extract SNI hostname from the parsed extension list. /// /// Returns `None` if no SNI extension is present or the extension's hostname @@ -187,7 +211,11 @@ fn extract_sni(extensions: &[TlsExtension<'_>]) -> Option { && let Some((_, hostname)) = list.first() { return Some(match std::str::from_utf8(hostname) { - Ok(s) if s.is_ascii() => SniValue::Ascii(s.to_string()), + Ok(s) if s.is_ascii() && !contains_c0_or_del(s) => SniValue::Ascii(s.to_string()), + Ok(s) if s.is_ascii() => SniValue::AsciiWithControl { + hostname: s.to_string(), + hex: bytes_to_hex(hostname), + }, Ok(s) => SniValue::NonAsciiUtf8 { hostname: s.to_string(), hex: bytes_to_hex(hostname), @@ -324,10 +352,13 @@ impl TlsAnalyzer { // Choose a map key that preserves uniqueness for non-UTF-8 cases. // Using `lossy` as a key would collapse distinct byte sequences whose // U+FFFD replacements happen to align — bad for forensic counting. - // For Ascii and NonAsciiUtf8 the hostname is valid UTF-8 with no - // collision risk, so use it directly. + // For Ascii, AsciiWithControl, and NonAsciiUtf8 the hostname is valid + // UTF-8 with no collision risk, so use it directly. Terminal-safe + // display of embedded control bytes is handled by the terminal + // reporter per ADR 0003. let key = match &sni { SniValue::Ascii(s) => s.clone(), + SniValue::AsciiWithControl { hostname, .. } => hostname.clone(), SniValue::NonAsciiUtf8 { hostname, .. } => hostname.clone(), SniValue::NonUtf8 { hex, .. } => format!(""), }; @@ -335,6 +366,28 @@ impl TlsAnalyzer { match sni { SniValue::Ascii(_) => {} // RFC-compliant, no finding + SniValue::AsciiWithControl { hostname, hex } => { + self.all_findings.push(Finding { + category: ThreatCategory::Anomaly, + verdict: Verdict::Inconclusive, + confidence: Confidence::Low, + // Raw hostname interpolation — the data layer stores raw + // bytes per ADR 0003 (including embedded C0/DEL control + // codes). The terminal reporter escapes them at render + // time to prevent terminal-injection; JSON output is + // already safe via serde_json's RFC 8259 escaping. + summary: format!( + "TLS SNI contains ASCII control characters \ + (RFC 6066 §3 requires ASCII; DNS preferred hostname \ + syntax per RFC 952 / RFC 1123 restricts to letters, \ + digits, and hyphens): {hostname}" + ), + evidence: vec![format!("hex: {hex}")], + mitre_technique: None, + source_ip: None, + timestamp: None, + }); + } SniValue::NonAsciiUtf8 { hostname, hex } => { self.all_findings.push(Finding { category: ThreatCategory::Anomaly, diff --git a/tests/tls_analyzer_tests.rs b/tests/tls_analyzer_tests.rs index 24ed43b..01a7050 100644 --- a/tests/tls_analyzer_tests.rs +++ b/tests/tls_analyzer_tests.rs @@ -1076,3 +1076,319 @@ fn test_trailing_bytes_in_server_name_list() { assert_eq!(analyzer.parse_error_count(), 0); assert_eq!(analyzer.handshake_count(), 1); } + +// ── issue #54: SNI containing ASCII control bytes (C0 / DEL) ───────────────── +// +// RFC 6066 §3 requires HostName to be ASCII; the DNS preferred hostname syntax +// (RFC 952 / RFC 1123, preserved under RFC 5890 A-label construction) restricts +// to letters, digits, and hyphens. No legitimate TLS client emits C0 (0x00–0x1F) +// or DEL (0x7F) in SNI, so the full C0+DEL range is the correct detection scope +// (narrower ranges like ESC+DEL alone would miss BEL / CR / LF / tab variants +// used for log-injection, terminal escape, or covert signalling). +// +// Tests below pin each control-byte case to the `AsciiWithControl` variant: +// finding fires with Anomaly / Inconclusive / Low (matching other SNI anomalies), +// sni_counts stores the *raw* hostname (per ADR 0003 — reporter escapes for +// terminal display), and mutually exclusive with the NonAsciiUtf8 path. + +/// Helper: build a ClientHello with an SNI containing arbitrary ASCII bytes. +/// Wraps `build_client_hello_raw_sni` — the latter already accepts `&[u8]`, +/// but this name documents intent at the call site. +fn build_client_hello_ascii_bytes(sni_bytes: &[u8], cipher_ids: &[u16]) -> Vec { + build_client_hello_raw_sni(sni_bytes, cipher_ids) +} + +/// Helper: count findings whose summary mentions the ASCII control-byte anomaly. +fn count_control_findings(analyzer: &TlsAnalyzer) -> usize { + analyzer + .findings() + .iter() + .filter(|f| f.summary.contains("ASCII control characters")) + .count() +} + +#[test] +fn test_ascii_sni_with_esc_emits_control_finding_and_counts_under_raw_key() { + // Primary test: a classic ANSI escape sequence embedded in the SNI. This + // is the terminal-injection shape the ADR 0003 pipeline is designed to + // contain — the analyzer preserves the raw bytes, the reporter escapes + // them at render. Here we assert only the analyzer-layer contract. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + // "foo\x1b[31m.example" — ESC + CSI 31m (red) sequence. + let sni: &[u8] = b"foo\x1b[31m.example"; + let record = build_client_hello_ascii_bytes(sni, &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!(analyzer.parse_error_count(), 0); + assert_eq!(analyzer.handshake_count(), 1); + + // Exactly one control-byte finding. + assert_eq!( + count_control_findings(&analyzer), + 1, + "expected exactly one ASCII control finding, got: {:?}", + analyzer.findings() + ); + let f = analyzer + .findings() + .into_iter() + .find(|f| f.summary.contains("ASCII control characters")) + .expect("expected control finding"); + assert_eq!(f.category, wirerust::findings::ThreatCategory::Anomaly); + assert_eq!(f.verdict, wirerust::findings::Verdict::Inconclusive); + assert_eq!(f.confidence, wirerust::findings::Confidence::Low); + assert!( + f.summary.contains("RFC 6066"), + "summary should cite RFC 6066, got: {}", + f.summary + ); + // Per ADR 0003: analyzer finding summary stores the raw hostname + // (including the ESC byte). Terminal escaping is the reporter's job. + assert!( + f.summary.as_bytes().contains(&0x1b), + "summary must preserve raw ESC byte per ADR 0003, got hex: {:?}", + f.summary.as_bytes() + ); + // Hex evidence is the lossless byte-form of the raw SNI. + assert!( + f.evidence + .iter() + .any(|e| e.contains("666f6f1b5b33316d2e6578616d706c65")), + "expected hex evidence to contain raw byte sequence, got: {:?}", + f.evidence + ); + + // sni_counts stores the raw hostname string (matches the NonAsciiUtf8 + // precedent — distinct ASCII-with-control strings produce distinct keys + // deterministically, no tagging is needed). + assert_eq!( + *analyzer + .sni_counts() + .get(std::str::from_utf8(sni).unwrap()) + .unwrap_or(&0), + 1, + "sni_counts must key on raw hostname, got keys: {:?}", + analyzer.sni_counts().keys().collect::>() + ); +} + +#[test] +fn test_ascii_sni_with_bel_emits_control_finding() { + // BEL (0x07) is the canonical "audible" control — not a terminal-escape + // vector per se, but still a protocol violation and a plausible covert + // signal. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello_ascii_bytes(b"ring\x07bell.example", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!(count_control_findings(&analyzer), 1); + assert_eq!( + *analyzer + .sni_counts() + .get("ring\x07bell.example") + .unwrap_or(&0), + 1 + ); +} + +#[test] +fn test_ascii_sni_with_del_emits_control_finding() { + // DEL (0x7F) sits at the very top of the 7-bit ASCII range. `char::is_ascii()` + // returns true — which is why the pre-fix analyzer silently accepted it. + // DEL is non-printable and non-LDH. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello_ascii_bytes(b"host\x7fname.example", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!(count_control_findings(&analyzer), 1); +} + +#[test] +fn test_ascii_sni_with_tab_emits_control_finding() { + // Tab (0x09) is C0 whitespace. Issue #54 flagged tab/CR/LF as an open + // question; RFC 6066 §3 + DNS preferred-hostname syntax (RFC 952/1123) + // disallow whitespace in SNI. Include in detection range. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello_ascii_bytes(b"left\tright.example", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!(count_control_findings(&analyzer), 1); +} + +#[test] +fn test_ascii_sni_with_cr_and_lf_emits_control_finding() { + // CR (0x0D) and LF (0x0A) are the classic log-injection vector — a + // logger that prints the SNI unescaped would see spurious new log lines. + // Both are C0 and must trip the finding. + + // CR alone. + let mut a1 = TlsAnalyzer::new(); + let fk = test_flow_key(); + a1.on_data( + &fk, + Direction::ClientToServer, + &build_client_hello_ascii_bytes(b"a\rb.example", &[0x1301]), + 0, + ); + assert_eq!(count_control_findings(&a1), 1, "CR must trip finding"); + + // LF alone. + let mut a2 = TlsAnalyzer::new(); + a2.on_data( + &fk, + Direction::ClientToServer, + &build_client_hello_ascii_bytes(b"a\nb.example", &[0x1301]), + 0, + ); + assert_eq!(count_control_findings(&a2), 1, "LF must trip finding"); + + // CRLF combined. + let mut a3 = TlsAnalyzer::new(); + a3.on_data( + &fk, + Direction::ClientToServer, + &build_client_hello_ascii_bytes(b"a\r\nb.example", &[0x1301]), + 0, + ); + assert_eq!(count_control_findings(&a3), 1, "CRLF must trip finding"); +} + +#[test] +fn test_printable_ascii_sni_emits_no_control_finding() { + // Regression guard: a plain printable-ASCII hostname must not trigger + // the new finding. A normal hostname keeps the intent clear. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello("www.example.com", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!(count_control_findings(&analyzer), 0); + assert_eq!( + analyzer + .findings() + .iter() + .filter(|f| f.summary.contains("SNI")) + .count(), + 0, + "printable ASCII must produce no SNI findings at all" + ); +} + +#[test] +fn test_punycode_a_label_emits_no_control_finding() { + // Regression guard: `xn--caf-dma.example` is the RFC-compliant A-label + // encoding of `café.example`. It's pure printable ASCII and must NOT + // trigger either the non-ASCII finding (covered elsewhere) or the new + // control-byte finding. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello("xn--caf-dma.example", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!(count_control_findings(&analyzer), 0); +} + +#[test] +fn test_non_ascii_sni_does_not_emit_control_finding() { + // Mutual exclusivity: a non-ASCII UTF-8 SNI (Cyrillic) falls into the + // `NonAsciiUtf8` branch, NOT `AsciiWithControl`. The `s.is_ascii()` + // guard on both control-byte arms prevents the non-ASCII path from + // ever landing in the new variant. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello("пример.example", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!( + count_control_findings(&analyzer), + 0, + "non-ASCII SNI must land in NonAsciiUtf8, not AsciiWithControl" + ); + // And the non-ASCII finding still fires. + assert_eq!( + analyzer + .findings() + .iter() + .filter(|f| f.summary.contains("non-ASCII characters")) + .count(), + 1 + ); +} + +#[test] +fn test_ascii_control_boundary_bytes() { + // Boundary pins on both ends of the C0+DEL detection range: + // - 0x00 (NUL, start of C0) MUST trip — most dangerous injection byte + // (C-string truncation, log-parser splitting). + // - 0x1F (Unit Separator, end of C0) MUST trip. + // - 0x20 (space, first printable) MUST NOT trip — space is disallowed + // by DNS preferred hostname syntax (RFC 952/1123), but it's not a + // control byte, so this issue deliberately leaves it alone. Broader + // LDH compliance is out-of-scope per issue #54's "Out of scope". + // - 0x7F (DEL) is the other non-C0 half of the detection range and is + // covered separately in `test_ascii_sni_with_del_emits_control_finding`. + let fk = test_flow_key(); + + for (label, byte, expect_trip) in [ + ("0x00 (NUL, start of C0)", 0x00u8, true), + ("0x1F (end of C0)", 0x1fu8, true), + ("0x20 (space, first printable)", 0x20u8, false), + ] { + let sni = vec![b'a', byte, b'b', b'.', b'e', b'x', b'a', b'm', b'p', b'l', b'e']; + let mut analyzer = TlsAnalyzer::new(); + analyzer.on_data( + &fk, + Direction::ClientToServer, + &build_client_hello_ascii_bytes(&sni, &[0x1301]), + 0, + ); + let expected = if expect_trip { 1 } else { 0 }; + assert_eq!( + count_control_findings(&analyzer), + expected, + "{label}: expected {expected} control finding(s)", + ); + } +} + +#[test] +fn test_multiple_control_bytes_in_sni_produces_single_finding() { + // If multiple C0/DEL bytes appear in the same SNI, the analyzer emits + // ONE finding for the hostname (not one per byte). Evidence still + // captures the full raw bytes via the hex field. + let mut analyzer = TlsAnalyzer::new(); + let fk = test_flow_key(); + + let record = build_client_hello_ascii_bytes(b"a\x07b\x1bc\x7fd.example", &[0x1301]); + analyzer.on_data(&fk, Direction::ClientToServer, &record, 0); + + assert_eq!( + count_control_findings(&analyzer), + 1, + "multiple control bytes in one SNI must produce one finding, not N" + ); + // Hex evidence captures all three bytes losslessly. + let f = analyzer + .findings() + .into_iter() + .find(|f| f.summary.contains("ASCII control characters")) + .unwrap(); + assert!( + f.evidence + .iter() + .any(|e| e.contains("07") && e.contains("1b") && e.contains("7f")), + "hex evidence must preserve all control bytes, got: {:?}", + f.evidence + ); +} From 4f69461eada262b9dd1adc9e1cc3902471756114 Mon Sep 17 00:00:00 2001 From: Zious Date: Sun, 12 Apr 2026 21:37:44 -0400 Subject: [PATCH 2/5] review: tighten RFC-inheritance wording and soften universal-client claim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses round 2 comment-analyzer nits: - "preserved under RFC 5890 A-label construction" → "inherited by ..." (RFC 952/1123 *originated* LDH; RFC 5890 inherits rather than preserves it). - "No legitimate TLS client emits..." → "No conforming TLS client should emit..." (the narrower conditional claim is robust to a single buggy stack in the wild). --- src/analyzer/tls.rs | 2 +- tests/tls_analyzer_tests.rs | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/analyzer/tls.rs b/src/analyzer/tls.rs index 0113585..9b1e6a9 100644 --- a/src/analyzer/tls.rs +++ b/src/analyzer/tls.rs @@ -154,7 +154,7 @@ fn compute_ja3s(version: u16, cipher: TlsCipherSuiteID, extensions: &[TlsExtensi /// /// - `AsciiWithControl` — bytes are pure ASCII but contain at least one C0 control /// byte (0x00–0x1F) or DEL (0x7F). RFC 6066 §3 requires ASCII; the DNS preferred -/// hostname syntax (RFC 952 / RFC 1123, preserved under RFC 5890 A-label +/// hostname syntax (RFC 952 / RFC 1123, inherited by RFC 5890 A-label /// construction) restricts to letters, digits, and hyphens — no whitespace, /// no control codes. An SNI like `"foo\x1b[31m.example"` is /// simultaneously a protocol violation, a potential terminal-injection vector diff --git a/tests/tls_analyzer_tests.rs b/tests/tls_analyzer_tests.rs index 01a7050..cd2cbc1 100644 --- a/tests/tls_analyzer_tests.rs +++ b/tests/tls_analyzer_tests.rs @@ -1080,9 +1080,10 @@ fn test_trailing_bytes_in_server_name_list() { // ── issue #54: SNI containing ASCII control bytes (C0 / DEL) ───────────────── // // RFC 6066 §3 requires HostName to be ASCII; the DNS preferred hostname syntax -// (RFC 952 / RFC 1123, preserved under RFC 5890 A-label construction) restricts -// to letters, digits, and hyphens. No legitimate TLS client emits C0 (0x00–0x1F) -// or DEL (0x7F) in SNI, so the full C0+DEL range is the correct detection scope +// (RFC 952 / RFC 1123, inherited by RFC 5890 A-label construction) restricts +// to letters, digits, and hyphens. No conforming TLS client should emit C0 +// (0x00–0x1F) or DEL (0x7F) in SNI, so the full C0+DEL range is the correct +// detection scope // (narrower ranges like ESC+DEL alone would miss BEL / CR / LF / tab variants // used for log-injection, terminal escape, or covert signalling). // From f76b7b2675c1e604a6016de623776d0c361d2efc Mon Sep 17 00:00:00 2001 From: Zious Date: Sun, 12 Apr 2026 21:40:37 -0400 Subject: [PATCH 3/5] style: apply rustfmt --- tests/tls_analyzer_tests.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/tls_analyzer_tests.rs b/tests/tls_analyzer_tests.rs index cd2cbc1..41388b7 100644 --- a/tests/tls_analyzer_tests.rs +++ b/tests/tls_analyzer_tests.rs @@ -1346,7 +1346,9 @@ fn test_ascii_control_boundary_bytes() { ("0x1F (end of C0)", 0x1fu8, true), ("0x20 (space, first printable)", 0x20u8, false), ] { - let sni = vec![b'a', byte, b'b', b'.', b'e', b'x', b'a', b'm', b'p', b'l', b'e']; + let sni = vec![ + b'a', byte, b'b', b'.', b'e', b'x', b'a', b'm', b'p', b'l', b'e', + ]; let mut analyzer = TlsAnalyzer::new(); analyzer.on_data( &fk, From 9da26bc585960e7b874f38090cb37f4b9444643c Mon Sep 17 00:00:00 2001 From: Zious Date: Sun, 12 Apr 2026 21:45:36 -0400 Subject: [PATCH 4/5] review: tighten Ascii variant docs and enforce contains_c0_or_del precondition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Copilot review round 1: - SniValue::Ascii doc narrowed too far ("pure printable ASCII 0x20–0x7E — RFC 6066 compliant"). Empty SNI (""), space, and other non-LDH printable ASCII all land here too. Reworded to describe what the arm actually does (no C0/DEL detected) and point readers at broader LDH compliance as out-of-scope per issue #54. - `Ascii(_) => {}` match-arm comment now says "No C0/DEL detected; no finding emitted at this layer" instead of the misleading "RFC-compliant". - contains_c0_or_del now carries a debug_assert on its ASCII precondition so future misuse (passing non-ASCII UTF-8, where continuation bytes ≥ 0x80 would produce a false negative) panics in debug builds. --- src/analyzer/tls.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/analyzer/tls.rs b/src/analyzer/tls.rs index 9b1e6a9..a6da174 100644 --- a/src/analyzer/tls.rs +++ b/src/analyzer/tls.rs @@ -171,8 +171,14 @@ fn compute_ja3s(version: u16, cipher: TlsCipherSuiteID, extensions: &[TlsExtensi /// All three are surfaced as Anomaly findings; usually a client bug or adversarial /// input, but worth forensic review. enum SniValue { - /// Hostname is pure printable ASCII (0x20–0x7E) — RFC 6066 compliant. May be - /// a literal hostname or an A-label (Punycode) form like `xn--caf-dma.example`. + /// Hostname is pure ASCII with no C0 control bytes (0x00–0x1F) and no DEL + /// (0x7F), so this classifier emits no finding. This arm is a "nothing to + /// flag at the byte-control level" bucket, not a full RFC 6066 / + /// DNS-preferred-hostname compliance check — empty hostnames (`""`), + /// spaces, and other non-LDH printable ASCII still land here. May be a + /// literal hostname or an A-label (Punycode) form like + /// `xn--caf-dma.example`. Broader LDH compliance is out of scope (see + /// issue #54's "Out of scope" section). Ascii(String), /// Hostname is pure ASCII but contains at least one C0 control byte /// (0x00–0x1F) or DEL (0x7F). `hostname` is the raw String (safe because @@ -196,6 +202,11 @@ enum SniValue { /// ≥ 0x80) and is redundant since non-ASCII already signals a protocol violation /// via `NonAsciiUtf8`. fn contains_c0_or_del(s: &str) -> bool { + debug_assert!( + s.is_ascii(), + "contains_c0_or_del requires ASCII input; non-ASCII UTF-8 \ + continuation bytes are ≥ 0x80 and would produce a false negative" + ); s.bytes().any(|b| b < 0x20 || b == 0x7f) } @@ -365,7 +376,7 @@ impl TlsAnalyzer { Self::increment(&mut self.sni_counts, key, MAX_MAP_ENTRIES); match sni { - SniValue::Ascii(_) => {} // RFC-compliant, no finding + SniValue::Ascii(_) => {} // No C0/DEL detected; no finding emitted at this layer. SniValue::AsciiWithControl { hostname, hex } => { self.all_findings.push(Finding { category: ThreatCategory::Anomaly, From 37bb466767b4f9fc3b9e1ba16ffc7e49ac5b7215 Mon Sep 17 00:00:00 2001 From: Zious Date: Sun, 12 Apr 2026 22:10:09 -0400 Subject: [PATCH 5/5] review: use Debug format for assertion diagnostic that may contain raw ESC The assertion "summary should cite RFC 6066" previously formatted f.summary with `{}` (Display). That summary is constructed from a hostname deliberately containing an ESC byte (the terminal-injection test shape), so a failing assertion would write raw control bytes to CI logs and potentially corrupt the log viewer. Debug formatting (`{:?}`) escapes the control bytes via escape_default so log output stays terminal-safe even on regression. --- tests/tls_analyzer_tests.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/tls_analyzer_tests.rs b/tests/tls_analyzer_tests.rs index 41388b7..3cd1d8b 100644 --- a/tests/tls_analyzer_tests.rs +++ b/tests/tls_analyzer_tests.rs @@ -1142,7 +1142,10 @@ fn test_ascii_sni_with_esc_emits_control_finding_and_counts_under_raw_key() { assert_eq!(f.confidence, wirerust::findings::Confidence::Low); assert!( f.summary.contains("RFC 6066"), - "summary should cite RFC 6066, got: {}", + // {:?} uses Debug formatting which escapes control bytes — prevents + // a failing assertion from corrupting CI logs with the raw ESC / ANSI + // sequence that the test deliberately constructs. + "summary should cite RFC 6066, got: {:?}", f.summary ); // Per ADR 0003: analyzer finding summary stores the raw hostname