Skip to content

Commit 985920f

Browse files
authored
fix(std/encoding/yaml): cap anchor/alias expansion and input size to block billion-laughs DoS (#1462)
1 parent 85c2edd commit 985920f

1 file changed

Lines changed: 193 additions & 0 deletions

File tree

std/encoding/yaml/src/lib.rs

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ use hew_cabi::{
1818
use std::ffi::CStr;
1919
use std::os::raw::c_char;
2020

21+
/// Reject YAML inputs larger than 1 MiB before parsing to avoid memory abuse.
22+
const YAML_PARSE_SIZE_LIMIT_BYTES: usize = 1024 * 1024;
23+
/// Reject YAML inputs declaring more than 32 anchors before parsing.
24+
const YAML_PARSE_ANCHOR_LIMIT: usize = 32;
25+
/// Reject YAML inputs referencing more than 1024 aliases before parsing.
26+
const YAML_PARSE_ALIAS_LIMIT: usize = 1024;
27+
2128
/// Opaque wrapper around a [`serde_yaml::Value`].
2229
///
2330
/// Returned by [`hew_yaml_parse`], [`hew_yaml_get_field`], and
@@ -50,6 +57,90 @@ fn get_parse_last_error() -> String {
5057
LAST_PARSE_ERROR.with(|error| error.borrow().clone().unwrap_or_default())
5158
}
5259

60+
fn validate_yaml_input_limits(input: &str) -> Result<(), String> {
61+
if input.len() > YAML_PARSE_SIZE_LIMIT_BYTES {
62+
return Err(format!(
63+
"invalid YAML input: size limit exceeded ({} bytes > {} byte cap)",
64+
input.len(),
65+
YAML_PARSE_SIZE_LIMIT_BYTES
66+
));
67+
}
68+
69+
if !input
70+
.as_bytes()
71+
.iter()
72+
.any(|byte| matches!(byte, b'&' | b'*'))
73+
{
74+
return Ok(());
75+
}
76+
77+
let bytes = input.as_bytes();
78+
let mut anchors = 0usize;
79+
let mut aliases = 0usize;
80+
let mut idx = 0usize;
81+
let mut in_single_quote = false;
82+
let mut in_double_quote = false;
83+
84+
while idx < bytes.len() {
85+
match bytes[idx] {
86+
b'\'' if !in_double_quote => {
87+
if in_single_quote && bytes.get(idx + 1) == Some(&b'\'') {
88+
idx += 2;
89+
continue;
90+
}
91+
in_single_quote = !in_single_quote;
92+
}
93+
b'"' if !in_single_quote => {
94+
in_double_quote = !in_double_quote;
95+
}
96+
b'\\' if in_double_quote => {
97+
idx += 2;
98+
continue;
99+
}
100+
b'&' | b'*'
101+
if !in_single_quote
102+
&& !in_double_quote
103+
&& bytes
104+
.get(idx + 1)
105+
.is_some_and(|next| is_yaml_anchor_alias_name_byte(*next)) =>
106+
{
107+
if bytes[idx] == b'&' {
108+
anchors += 1;
109+
if anchors > YAML_PARSE_ANCHOR_LIMIT {
110+
return Err(format!(
111+
"invalid YAML input: anchor limit exceeded ({anchors} > {YAML_PARSE_ANCHOR_LIMIT})"
112+
));
113+
}
114+
} else {
115+
aliases += 1;
116+
if aliases > YAML_PARSE_ALIAS_LIMIT {
117+
return Err(format!(
118+
"invalid YAML input: alias limit exceeded ({aliases} > {YAML_PARSE_ALIAS_LIMIT})"
119+
));
120+
}
121+
}
122+
}
123+
_ => {}
124+
}
125+
126+
idx += 1;
127+
}
128+
129+
Ok(())
130+
}
131+
132+
fn is_yaml_anchor_alias_name_byte(byte: u8) -> bool {
133+
matches!(
134+
byte,
135+
b'a'..=b'z'
136+
| b'A'..=b'Z'
137+
| b'0'..=b'9'
138+
| b'_'
139+
| b'-'
140+
| b'.'
141+
)
142+
}
143+
53144
// ---------------------------------------------------------------------------
54145
// C ABI exports
55146
// ---------------------------------------------------------------------------
@@ -73,6 +164,10 @@ pub unsafe extern "C" fn hew_yaml_parse(yaml_str: *const c_char) -> *mut HewYaml
73164
set_parse_last_error("invalid YAML input: input was not valid UTF-8");
74165
return std::ptr::null_mut();
75166
};
167+
if let Err(err) = validate_yaml_input_limits(s) {
168+
set_parse_last_error(err);
169+
return std::ptr::null_mut();
170+
}
76171
match serde_yaml::from_str::<serde_yaml::Value>(s) {
77172
Ok(val) => {
78173
clear_parse_last_error();
@@ -930,6 +1025,7 @@ pub extern "C" fn hew_yaml_from_null() -> *mut HewYamlValue {
9301025
mod tests {
9311026
use super::*;
9321027
use std::ffi::CString;
1028+
use std::fmt::Write as _;
9331029

9341030
/// Helper: parse a YAML string and return the owned pointer.
9351031
fn parse(yaml: &str) -> *mut HewYamlValue {
@@ -1472,6 +1568,103 @@ mod tests {
14721568
unsafe { hew_yaml_free(ok) };
14731569
}
14741570

1571+
fn billion_laughs_alias_limit_yaml() -> String {
1572+
let mut yaml = String::from("a: &a [\"x\"]\n");
1573+
let mut previous = String::from("a");
1574+
1575+
for level in b'b'..=b'i' {
1576+
let name = char::from(level).to_string();
1577+
write!(yaml, "{name}: &{name} [").expect("write YAML alias bomb prefix");
1578+
for alias_idx in 0..9 {
1579+
if alias_idx > 0 {
1580+
yaml.push_str(", ");
1581+
}
1582+
yaml.push('*');
1583+
yaml.push_str(&previous);
1584+
}
1585+
yaml.push_str("]\n");
1586+
previous = name;
1587+
}
1588+
1589+
yaml.push_str("boom: [");
1590+
for alias_idx in 0..=YAML_PARSE_ALIAS_LIMIT {
1591+
if alias_idx > 0 {
1592+
yaml.push_str(", ");
1593+
}
1594+
yaml.push('*');
1595+
yaml.push_str(&previous);
1596+
}
1597+
yaml.push_str("]\n");
1598+
yaml
1599+
}
1600+
1601+
#[test]
1602+
fn parse_rejects_alias_bomb_before_deserialization() {
1603+
let yaml = billion_laughs_alias_limit_yaml();
1604+
let val = parse(&yaml);
1605+
assert!(val.is_null());
1606+
1607+
// SAFETY: hew_yaml_last_error returns a malloc-allocated C string.
1608+
let err = unsafe { read_and_free_cstr(hew_yaml_last_error()) };
1609+
assert!(err.contains("alias limit"));
1610+
}
1611+
1612+
#[test]
1613+
fn parse_rejects_input_over_size_cap() {
1614+
let yaml = "a".repeat(YAML_PARSE_SIZE_LIMIT_BYTES * 2);
1615+
let val = parse(&yaml);
1616+
assert!(val.is_null());
1617+
1618+
// SAFETY: hew_yaml_last_error returns a malloc-allocated C string.
1619+
let err = unsafe { read_and_free_cstr(hew_yaml_last_error()) };
1620+
assert!(err.contains("size limit"));
1621+
assert!(err.contains("byte cap"));
1622+
}
1623+
1624+
#[test]
1625+
fn parse_accepts_normal_anchor_and_alias_usage() {
1626+
let yaml = r#"
1627+
defaults: &defaults
1628+
enabled: true
1629+
tags: ["base"]
1630+
profile: &profile
1631+
<<: *defaults
1632+
name: hew
1633+
release: &release
1634+
<<: *profile
1635+
version: 1
1636+
copy_one: *defaults
1637+
copy_two: *profile
1638+
copy_three: *release
1639+
"#;
1640+
let val = parse(yaml);
1641+
assert!(!val.is_null());
1642+
1643+
// SAFETY: val is a valid pointer returned by parse.
1644+
unsafe {
1645+
let key = CString::new("copy_three").unwrap();
1646+
let copy_three = hew_yaml_get_field(val, key.as_ptr());
1647+
assert!(!copy_three.is_null());
1648+
assert_eq!(hew_yaml_type(copy_three), 6);
1649+
hew_yaml_free(copy_three);
1650+
hew_yaml_free(val);
1651+
}
1652+
}
1653+
1654+
#[test]
1655+
fn parse_ignores_anchor_like_markers_inside_quotes() {
1656+
let val = parse("'&not_anchor *not_alias'");
1657+
assert!(!val.is_null());
1658+
1659+
// SAFETY: val is a valid pointer returned by parse.
1660+
unsafe {
1661+
assert_eq!(hew_yaml_type(val), 4);
1662+
let s = read_and_free_cstr(hew_yaml_get_string(val));
1663+
assert_eq!(s, "&not_anchor *not_alias");
1664+
hew_yaml_free(val);
1665+
}
1666+
}
1667+
14751668
#[test]
14761669
fn get_bytes_invalid_base64_returns_null_and_sets_last_error() {
14771670
clear_parse_last_error();

0 commit comments

Comments
 (0)