@@ -18,6 +18,13 @@ use hew_cabi::{
1818use std:: ffi:: CStr ;
1919use std:: os:: raw:: c_char;
2020
21+ /// Reject YAML inputs larger than 1 MiB before parsing to avoid memory abuse.
22+ const YAML_PARSE_SIZE_LIMIT_BYTES : usize = 1024 * 1024 ;
23+ /// Reject YAML inputs declaring more than 32 anchors before parsing.
24+ const YAML_PARSE_ANCHOR_LIMIT : usize = 32 ;
25+ /// Reject YAML inputs referencing more than 1024 aliases before parsing.
26+ const YAML_PARSE_ALIAS_LIMIT : usize = 1024 ;
27+
2128/// Opaque wrapper around a [`serde_yaml::Value`].
2229///
2330/// Returned by [`hew_yaml_parse`], [`hew_yaml_get_field`], and
@@ -50,6 +57,90 @@ fn get_parse_last_error() -> String {
5057 LAST_PARSE_ERROR . with ( |error| error. borrow ( ) . clone ( ) . unwrap_or_default ( ) )
5158}
5259
60+ fn validate_yaml_input_limits ( input : & str ) -> Result < ( ) , String > {
61+ if input. len ( ) > YAML_PARSE_SIZE_LIMIT_BYTES {
62+ return Err ( format ! (
63+ "invalid YAML input: size limit exceeded ({} bytes > {} byte cap)" ,
64+ input. len( ) ,
65+ YAML_PARSE_SIZE_LIMIT_BYTES
66+ ) ) ;
67+ }
68+
69+ if !input
70+ . as_bytes ( )
71+ . iter ( )
72+ . any ( |byte| matches ! ( byte, b'&' | b'*' ) )
73+ {
74+ return Ok ( ( ) ) ;
75+ }
76+
77+ let bytes = input. as_bytes ( ) ;
78+ let mut anchors = 0usize ;
79+ let mut aliases = 0usize ;
80+ let mut idx = 0usize ;
81+ let mut in_single_quote = false ;
82+ let mut in_double_quote = false ;
83+
84+ while idx < bytes. len ( ) {
85+ match bytes[ idx] {
86+ b'\'' if !in_double_quote => {
87+ if in_single_quote && bytes. get ( idx + 1 ) == Some ( & b'\'' ) {
88+ idx += 2 ;
89+ continue ;
90+ }
91+ in_single_quote = !in_single_quote;
92+ }
93+ b'"' if !in_single_quote => {
94+ in_double_quote = !in_double_quote;
95+ }
96+ b'\\' if in_double_quote => {
97+ idx += 2 ;
98+ continue ;
99+ }
100+ b'&' | b'*'
101+ if !in_single_quote
102+ && !in_double_quote
103+ && bytes
104+ . get ( idx + 1 )
105+ . is_some_and ( |next| is_yaml_anchor_alias_name_byte ( * next) ) =>
106+ {
107+ if bytes[ idx] == b'&' {
108+ anchors += 1 ;
109+ if anchors > YAML_PARSE_ANCHOR_LIMIT {
110+ return Err ( format ! (
111+ "invalid YAML input: anchor limit exceeded ({anchors} > {YAML_PARSE_ANCHOR_LIMIT})"
112+ ) ) ;
113+ }
114+ } else {
115+ aliases += 1 ;
116+ if aliases > YAML_PARSE_ALIAS_LIMIT {
117+ return Err ( format ! (
118+ "invalid YAML input: alias limit exceeded ({aliases} > {YAML_PARSE_ALIAS_LIMIT})"
119+ ) ) ;
120+ }
121+ }
122+ }
123+ _ => { }
124+ }
125+
126+ idx += 1 ;
127+ }
128+
129+ Ok ( ( ) )
130+ }
131+
132+ fn is_yaml_anchor_alias_name_byte ( byte : u8 ) -> bool {
133+ matches ! (
134+ byte,
135+ b'a' ..=b'z'
136+ | b'A' ..=b'Z'
137+ | b'0' ..=b'9'
138+ | b'_'
139+ | b'-'
140+ | b'.'
141+ )
142+ }
143+
53144// ---------------------------------------------------------------------------
54145// C ABI exports
55146// ---------------------------------------------------------------------------
@@ -73,6 +164,10 @@ pub unsafe extern "C" fn hew_yaml_parse(yaml_str: *const c_char) -> *mut HewYaml
73164 set_parse_last_error ( "invalid YAML input: input was not valid UTF-8" ) ;
74165 return std:: ptr:: null_mut ( ) ;
75166 } ;
167+ if let Err ( err) = validate_yaml_input_limits ( s) {
168+ set_parse_last_error ( err) ;
169+ return std:: ptr:: null_mut ( ) ;
170+ }
76171 match serde_yaml:: from_str :: < serde_yaml:: Value > ( s) {
77172 Ok ( val) => {
78173 clear_parse_last_error ( ) ;
@@ -930,6 +1025,7 @@ pub extern "C" fn hew_yaml_from_null() -> *mut HewYamlValue {
9301025mod tests {
9311026 use super :: * ;
9321027 use std:: ffi:: CString ;
1028+ use std:: fmt:: Write as _;
9331029
9341030 /// Helper: parse a YAML string and return the owned pointer.
9351031 fn parse ( yaml : & str ) -> * mut HewYamlValue {
@@ -1472,6 +1568,103 @@ mod tests {
14721568 unsafe { hew_yaml_free ( ok) } ;
14731569 }
14741570
1571+ fn billion_laughs_alias_limit_yaml ( ) -> String {
1572+ let mut yaml = String :: from ( "a: &a [\" x\" ]\n " ) ;
1573+ let mut previous = String :: from ( "a" ) ;
1574+
1575+ for level in b'b' ..=b'i' {
1576+ let name = char:: from ( level) . to_string ( ) ;
1577+ write ! ( yaml, "{name}: &{name} [" ) . expect ( "write YAML alias bomb prefix" ) ;
1578+ for alias_idx in 0 ..9 {
1579+ if alias_idx > 0 {
1580+ yaml. push_str ( ", " ) ;
1581+ }
1582+ yaml. push ( '*' ) ;
1583+ yaml. push_str ( & previous) ;
1584+ }
1585+ yaml. push_str ( "]\n " ) ;
1586+ previous = name;
1587+ }
1588+
1589+ yaml. push_str ( "boom: [" ) ;
1590+ for alias_idx in 0 ..=YAML_PARSE_ALIAS_LIMIT {
1591+ if alias_idx > 0 {
1592+ yaml. push_str ( ", " ) ;
1593+ }
1594+ yaml. push ( '*' ) ;
1595+ yaml. push_str ( & previous) ;
1596+ }
1597+ yaml. push_str ( "]\n " ) ;
1598+ yaml
1599+ }
1600+
1601+ #[ test]
1602+ fn parse_rejects_alias_bomb_before_deserialization ( ) {
1603+ let yaml = billion_laughs_alias_limit_yaml ( ) ;
1604+ let val = parse ( & yaml) ;
1605+ assert ! ( val. is_null( ) ) ;
1606+
1607+ // SAFETY: hew_yaml_last_error returns a malloc-allocated C string.
1608+ let err = unsafe { read_and_free_cstr ( hew_yaml_last_error ( ) ) } ;
1609+ assert ! ( err. contains( "alias limit" ) ) ;
1610+ }
1611+
1612+ #[ test]
1613+ fn parse_rejects_input_over_size_cap ( ) {
1614+ let yaml = "a" . repeat ( YAML_PARSE_SIZE_LIMIT_BYTES * 2 ) ;
1615+ let val = parse ( & yaml) ;
1616+ assert ! ( val. is_null( ) ) ;
1617+
1618+ // SAFETY: hew_yaml_last_error returns a malloc-allocated C string.
1619+ let err = unsafe { read_and_free_cstr ( hew_yaml_last_error ( ) ) } ;
1620+ assert ! ( err. contains( "size limit" ) ) ;
1621+ assert ! ( err. contains( "byte cap" ) ) ;
1622+ }
1623+
1624+ #[ test]
1625+ fn parse_accepts_normal_anchor_and_alias_usage ( ) {
1626+ let yaml = r#"
1627+ defaults: &defaults
1628+ enabled: true
1629+ tags: ["base"]
1630+ profile: &profile
1631+ <<: *defaults
1632+ name: hew
1633+ release: &release
1634+ <<: *profile
1635+ version: 1
1636+ copy_one: *defaults
1637+ copy_two: *profile
1638+ copy_three: *release
1639+ "# ;
1640+ let val = parse ( yaml) ;
1641+ assert ! ( !val. is_null( ) ) ;
1642+
1643+ // SAFETY: val is a valid pointer returned by parse.
1644+ unsafe {
1645+ let key = CString :: new ( "copy_three" ) . unwrap ( ) ;
1646+ let copy_three = hew_yaml_get_field ( val, key. as_ptr ( ) ) ;
1647+ assert ! ( !copy_three. is_null( ) ) ;
1648+ assert_eq ! ( hew_yaml_type( copy_three) , 6 ) ;
1649+ hew_yaml_free ( copy_three) ;
1650+ hew_yaml_free ( val) ;
1651+ }
1652+ }
1653+
1654+ #[ test]
1655+ fn parse_ignores_anchor_like_markers_inside_quotes ( ) {
1656+ let val = parse ( "'¬_anchor *not_alias'" ) ;
1657+ assert ! ( !val. is_null( ) ) ;
1658+
1659+ // SAFETY: val is a valid pointer returned by parse.
1660+ unsafe {
1661+ assert_eq ! ( hew_yaml_type( val) , 4 ) ;
1662+ let s = read_and_free_cstr ( hew_yaml_get_string ( val) ) ;
1663+ assert_eq ! ( s, "¬_anchor *not_alias" ) ;
1664+ hew_yaml_free ( val) ;
1665+ }
1666+ }
1667+
14751668 #[ test]
14761669 fn get_bytes_invalid_base64_returns_null_and_sets_last_error ( ) {
14771670 clear_parse_last_error ( ) ;
0 commit comments