Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 55 additions & 3 deletions src/analyzer/tls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,13 +145,38 @@ fn compute_ja3s(version: u16, cipher: TlsCipherSuiteID, extensions: &[TlsExtensi
bytes_to_hex(Md5::digest(ja3s_str.as_bytes()).as_slice())
}

/// Result of decoding a TLS SNI hostname.
///
/// RFC 6066 §3 specifies that the `HostName` field is "represented as a byte
/// string using ASCII encoding". Internationalized names use A-labels (RFC 5890),
/// which are also ASCII. A `NonUtf8` SNI is therefore a protocol violation —
/// usually a buggy TLS client, but worth surfacing for forensic review.
enum SniValue {
/// Hostname decoded cleanly as UTF-8 (which includes the RFC-compliant ASCII case).
Utf8(String),
/// Hostname bytes failed UTF-8 decoding. `lossy` is the U+FFFD-replaced form
/// for human display; `hex` is the lossless lowercase hex of the raw bytes.
NonUtf8 { lossy: String, hex: String },
}

/// Extract SNI hostname from the parsed extension list.
fn extract_sni(extensions: &[TlsExtension<'_>]) -> Option<String> {
///
/// Returns `None` if no SNI extension is present or the extension's hostname
/// list is empty. Otherwise returns an `SniValue` describing the first hostname
/// (we ignore additional entries — multi-name SNI is rare and treating only the
/// first matches the prior behavior).
fn extract_sni(extensions: &[TlsExtension<'_>]) -> Option<SniValue> {
for ext in extensions {
if let TlsExtension::SNI(list) = ext
&& let Some((_, hostname)) = list.first()
{
return String::from_utf8(hostname.to_vec()).ok();
return Some(match std::str::from_utf8(hostname) {
Ok(s) => SniValue::Utf8(s.to_string()),
Err(_) => SniValue::NonUtf8 {
lossy: String::from_utf8_lossy(hostname).into_owned(),
hex: bytes_to_hex(hostname),
},
});
}
}
None
Expand Down Expand Up @@ -276,7 +301,34 @@ impl TlsAnalyzer {

// SNI
if let Some(sni) = extract_sni(&exts) {
Self::increment(&mut self.sni_counts, sni, MAX_MAP_ENTRIES);
// Choose a map key that preserves uniqueness for non-UTF-8 cases.
// Using `lossy` as a key would collapse distinct byte sequences whose
// U+FFFD replacements happen to align — bad for forensic counting.
let key = match &sni {
SniValue::Utf8(s) => s.clone(),
SniValue::NonUtf8 { hex, .. } => format!("<non-utf8:{hex}>"),
};
Self::increment(&mut self.sni_counts, key, MAX_MAP_ENTRIES);

if let SniValue::NonUtf8 { lossy, hex } = sni {
self.all_findings.push(Finding {
category: ThreatCategory::Anomaly,
verdict: Verdict::Inconclusive,
confidence: Confidence::Low,
// Use Debug formatter ({:?}) to escape control bytes (e.g. ESC 0x1b)
// that String::from_utf8_lossy preserves but the analyst's terminal
// would interpret as ANSI control sequences. Without this an attacker
// could craft a malformed SNI like b"\x1b[31m..." that recolors or
// overwrites the rendered finding line.
summary: format!(
"TLS SNI contains non-UTF-8 bytes (RFC 6066 violation): {lossy:?}"
),
evidence: vec![format!("hex: {hex}")],
mitre_technique: None,
source_ip: None,
timestamp: None,
});
}
}

// JA3
Expand Down
117 changes: 116 additions & 1 deletion tests/tls_analyzer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@ fn test_flow_key() -> FlowKey {

/// Build a minimal TLS ClientHello record with SNI and specified cipher suites.
fn build_client_hello(sni: &str, cipher_ids: &[u16]) -> Vec<u8> {
build_client_hello_raw_sni(sni.as_bytes(), cipher_ids)
}

/// Build a minimal TLS ClientHello record with arbitrary raw bytes for the SNI hostname.
/// Used for tests that exercise non-UTF-8 / malformed SNI handling.
fn build_client_hello_raw_sni(sni_bytes: &[u8], cipher_ids: &[u16]) -> Vec<u8> {
let mut extensions = Vec::new();

// SNI extension (type 0x0000)
let sni_bytes = sni.as_bytes();
let sni_list_len = (3 + sni_bytes.len()) as u16;
let sni_ext_len = 2 + sni_list_len;
extensions.extend_from_slice(&[0x00, 0x00]); // extension type: server_name
Expand Down Expand Up @@ -288,3 +293,113 @@ fn test_summarize_output() {
assert!(detail.contains_key("cipher_suites"));
assert_eq!(detail["parse_errors"], 0);
}

#[test]
fn test_non_utf8_sni_emits_finding_and_counts_under_hex_key() {
let mut analyzer = TlsAnalyzer::new();
let fk = test_flow_key();

// 0xFF / 0xFE are invalid as standalone UTF-8 start bytes — guarantees
// from_utf8 fails. Mix in some ASCII so the lossy form is recognizable.
let raw_sni: &[u8] = &[0xff, 0xfe, b'a', b'.', b'c', b'o', b'm'];
let record = build_client_hello_raw_sni(raw_sni, &[0x1301]);
analyzer.on_data(&fk, Direction::ClientToServer, &record, 0);

// Parse error counter must NOT be incremented — the record itself parsed,
// only the SNI hostname bytes failed UTF-8 decoding.
assert_eq!(analyzer.parse_error_count(), 0);

// sni_counts should be keyed on a tagged hex form, not on a lossy string.
// This guarantees distinct byte sequences don't collide.
let expected_key = "<non-utf8:fffe612e636f6d>";
assert_eq!(
*analyzer.sni_counts().get(expected_key).unwrap(),
1,
"expected sni_counts to contain hex-tagged key {expected_key}"
);

// Exactly one finding, the non-UTF-8 SNI anomaly.
let findings = analyzer.findings();
let non_utf8_findings: Vec<_> = findings
.iter()
.filter(|f| f.summary.contains("non-UTF-8 bytes"))
.collect();
assert_eq!(
non_utf8_findings.len(),
1,
"expected exactly one non-UTF-8 SNI finding, got: {findings:?}"
);
let f = non_utf8_findings[0];
assert_eq!(f.category, wirerust::findings::ThreatCategory::Anomaly);
assert_eq!(f.verdict, wirerust::findings::Verdict::Inconclusive);
assert_eq!(f.confidence, wirerust::findings::Confidence::Low);
assert!(f.summary.contains("RFC 6066"));
// Hex evidence is the lossless representation of the raw bytes.
assert!(
f.evidence.iter().any(|e| e.contains("fffe612e636f6d")),
"expected hex evidence to contain raw byte sequence, got: {:?}",
f.evidence
);
}

#[test]
fn test_ascii_sni_does_not_emit_non_utf8_finding() {
// Regression: a normal ASCII hostname must not trip the non-UTF-8 finding.
let mut analyzer = TlsAnalyzer::new();
let fk = test_flow_key();

let record = build_client_hello("example.com", &[0x1301]);
analyzer.on_data(&fk, Direction::ClientToServer, &record, 0);

assert_eq!(*analyzer.sni_counts().get("example.com").unwrap(), 1);
let non_utf8_findings = analyzer
.findings()
.iter()
.filter(|f| f.summary.contains("non-UTF-8 bytes"))
.count();
assert_eq!(non_utf8_findings, 0);
}

#[test]
fn test_non_utf8_sni_escapes_control_bytes_in_summary() {
// Security regression: a malformed SNI containing raw ESC (0x1b) plus an
// ANSI CSI sequence must NOT propagate the literal control byte into the
// finding summary, where it would be interpreted by an analyst's terminal
// and could recolor or overwrite the rendered report line.
let mut analyzer = TlsAnalyzer::new();
let fk = test_flow_key();

// 0xff makes from_utf8 fail; 0x1b [ 3 1 m is the ANSI "red" CSI sequence;
// "pwnd" is the visible payload an attacker would inject.
let raw_sni: &[u8] = &[0xff, 0x1b, b'[', b'3', b'1', b'm', b'p', b'w', b'n', b'd'];
let record = build_client_hello_raw_sni(raw_sni, &[0x1301]);
analyzer.on_data(&fk, Direction::ClientToServer, &record, 0);

let findings = analyzer.findings();
let f = findings
.iter()
.find(|f| f.summary.contains("non-UTF-8 bytes"))
.expect("expected non-UTF-8 SNI finding");

// The summary must not contain the raw ESC byte. Debug formatting ({:?})
// turns 0x1b into the literal escape sequence "\u{1b}" instead.
assert!(
!f.summary.as_bytes().contains(&0x1b),
"summary contains raw ESC byte (terminal injection vector): {:?}",
f.summary
);
assert!(
f.summary.contains("\\u{1b}"),
"summary should contain escaped ESC sequence \\u{{1b}}, got: {}",
f.summary
);

// Hex evidence is unchanged — that's the lossless record.
assert!(
f.evidence
.iter()
.any(|e| e.contains("ff1b5b33316d70776e64")),
"expected raw bytes in hex evidence, got: {:?}",
f.evidence
);
}