diff --git a/crates/kernel_cmdline/src/bytes.rs b/crates/kernel_cmdline/src/bytes.rs index 9bc909caa..d4af49f76 100644 --- a/crates/kernel_cmdline/src/bytes.rs +++ b/crates/kernel_cmdline/src/bytes.rs @@ -15,7 +15,7 @@ use anyhow::Result; /// Wraps the raw command line bytes and provides methods for parsing and iterating /// over individual parameters. Uses copy-on-write semantics to avoid unnecessary /// allocations when working with borrowed data. -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct Cmdline<'a>(Cow<'a, [u8]>); impl<'a, T: AsRef<[u8]> + ?Sized> From<&'a T> for Cmdline<'a> { @@ -38,19 +38,57 @@ impl<'a> From> for Cmdline<'a> { /// /// This is created by the `iter` method on `Cmdline`. #[derive(Debug)] -pub struct CmdlineIter<'a>(&'a [u8]); +pub struct CmdlineIter<'a>(CmdlineIterBytes<'a>); impl<'a> Iterator for CmdlineIter<'a> { type Item = Parameter<'a>; fn next(&mut self) -> Option { - let (param, rest) = Parameter::parse_one(self.0); + self.0.next().and_then(Parameter::parse_internal) + } +} + +/// An iterator over kernel command line parameters as byte slices. +/// +/// This is created by the `iter_bytes` method on `Cmdline`. +#[derive(Debug)] +pub struct CmdlineIterBytes<'a>(&'a [u8]); + +impl<'a> Iterator for CmdlineIterBytes<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option { + let input = self.0.trim_ascii_start(); + + if input.is_empty() { + self.0 = input; + return None; + } + + let mut in_quotes = false; + let end = input.iter().position(move |c| { + if *c == b'"' { + in_quotes = !in_quotes; + } + !in_quotes && c.is_ascii_whitespace() + }); + + let end = end.unwrap_or(input.len()); + let (param, rest) = input.split_at(end); self.0 = rest; - param + + Some(param) } } impl<'a> Cmdline<'a> { + /// Creates a new empty owned `Cmdline`. + /// + /// This is equivalent to `Cmdline::default()` but makes ownership explicit. + pub fn new() -> Cmdline<'static> { + Cmdline::default() + } + /// Reads the kernel command line from `/proc/cmdline`. /// /// Returns an error if the file cannot be read or if there are I/O issues. @@ -64,7 +102,15 @@ impl<'a> Cmdline<'a> { /// unquoted whitespace characters. Parameters are parsed as either /// key-only switches or key=value pairs. pub fn iter(&'a self) -> CmdlineIter<'a> { - CmdlineIter(&self.0) + CmdlineIter(self.iter_bytes()) + } + + /// Returns an iterator over all parameters in the command line as byte slices. + /// + /// This is similar to `iter()` but yields `&[u8]` directly instead of `Parameter`, + /// which can be more convenient when you just need the raw byte representation. + pub fn iter_bytes(&self) -> CmdlineIterBytes<'_> { + CmdlineIterBytes(&self.0) } /// Returns an iterator over all parameters in the command line @@ -362,51 +408,27 @@ impl<'a> Parameter<'a> { /// be constructed from the input. This occurs when the input is /// either empty or contains only whitespace. /// - /// Any remaining bytes not consumed from the input are discarded. + /// If the input contains multiple parameters, only the first one + /// is parsed and the rest is discarded. pub fn parse + ?Sized>(input: &'a T) -> Option { - Self::parse_one(input).0 + CmdlineIterBytes(input.as_ref()) + .next() + .and_then(Self::parse_internal) } - /// Attempt to parse a single command line parameter from a slice - /// of bytes. - /// - /// The first tuple item contains the parsed parameter, or `None` - /// if a Parameter could not be constructed from the input. This - /// occurs when the input is either empty or contains only - /// whitespace. + /// Parse a parameter from a byte slice that contains exactly one parameter. /// - /// Any remaining bytes not consumed from the input are returned - /// as the second tuple item. - pub fn parse_one + ?Sized>(input: &'a T) -> (Option, &'a [u8]) { - let input = input.as_ref().trim_ascii_start(); - - if input.is_empty() { - return (None, input); - } - - let mut in_quotes = false; - let end = input.iter().position(move |c| { - if *c == b'"' { - in_quotes = !in_quotes; - } - !in_quotes && c.is_ascii_whitespace() - }); - - let end = match end { - Some(end) => end, - None => input.len(), - }; - - let (input, rest) = input.split_at(end); - + /// This is an internal method that assumes the input has already been + /// split into a single parameter (e.g., by CmdlineIterBytes). + fn parse_internal(input: &'a [u8]) -> Option { let equals = input.iter().position(|b| *b == b'='); - let ret = match equals { - None => Self { + match equals { + None => Some(Self { parameter: input, key: ParameterKey(input), value: None, - }, + }), Some(i) => { let (key, mut value) = input.split_at(i); let key = ParameterKey(key); @@ -421,15 +443,13 @@ impl<'a> Parameter<'a> { v.strip_suffix(b"\"").unwrap_or(v) }; - Self { + Some(Self { parameter: input, key, value: Some(value), - } + }) } - }; - - (Some(ret), rest) + } } /// Returns the key part of the parameter @@ -472,27 +492,19 @@ mod tests { } #[test] - fn test_parameter_parse_one() { - let (p, rest) = Parameter::parse_one(b"foo"); - let p = p.unwrap(); + fn test_parameter_parse() { + let p = Parameter::parse(b"foo").unwrap(); assert_eq!(p.key.0, b"foo"); assert_eq!(p.value, None); - assert_eq!(rest, "".as_bytes()); - // should consume one parameter and return the rest of the input - let (p, rest) = Parameter::parse_one(b"foo=bar baz"); - let p = p.unwrap(); + // should parse only the first parameter and discard the rest of the input + let p = Parameter::parse(b"foo=bar baz").unwrap(); assert_eq!(p.key.0, b"foo"); assert_eq!(p.value, Some(b"bar".as_slice())); - assert_eq!(rest, " baz".as_bytes()); // should return None on empty or whitespace inputs - let (p, rest) = Parameter::parse_one(b""); - assert!(p.is_none()); - assert_eq!(rest, b"".as_slice()); - let (p, rest) = Parameter::parse_one(b" "); - assert!(p.is_none()); - assert_eq!(rest, b"".as_slice()); + assert!(Parameter::parse(b"").is_none()); + assert!(Parameter::parse(b" ").is_none()); } #[test] @@ -527,11 +539,10 @@ mod tests { #[test] fn test_parameter_internal_key_whitespace() { - let (p, rest) = Parameter::parse_one("foo bar=baz".as_bytes()); - let p = p.unwrap(); + // parse should only consume the first parameter + let p = Parameter::parse("foo bar=baz".as_bytes()).unwrap(); assert_eq!(p.key.0, b"foo"); assert_eq!(p.value, None); - assert_eq!(rest, b" bar=baz"); } #[test] @@ -609,6 +620,16 @@ mod tests { assert_eq!(kargs.iter().next(), None); } + #[test] + fn test_cmdline_new() { + let kargs = Cmdline::new(); + assert_eq!(kargs.iter().next(), None); + assert!(kargs.is_owned()); + + // Verify we can store it in a 'static context + let _static_kargs: Cmdline<'static> = Cmdline::new(); + } + #[test] fn test_kargs_iter_utf8() { let kargs = Cmdline::from(b"foo=bar,bar2 \xff baz=fuz bad=oh\xffno wiz"); @@ -906,4 +927,54 @@ mod tests { assert_eq!(params[1], param("baz=qux")); assert_eq!(params[2], param("wiz")); } + + #[test] + fn test_iter_bytes_simple() { + let kargs = Cmdline::from(b"foo bar baz"); + let params: Vec<_> = kargs.iter_bytes().collect(); + + assert_eq!(params.len(), 3); + assert_eq!(params[0], b"foo"); + assert_eq!(params[1], b"bar"); + assert_eq!(params[2], b"baz"); + } + + #[test] + fn test_iter_bytes_with_values() { + let kargs = Cmdline::from(b"foo=bar baz=qux wiz"); + let params: Vec<_> = kargs.iter_bytes().collect(); + + assert_eq!(params.len(), 3); + assert_eq!(params[0], b"foo=bar"); + assert_eq!(params[1], b"baz=qux"); + assert_eq!(params[2], b"wiz"); + } + + #[test] + fn test_iter_bytes_with_quotes() { + let kargs = Cmdline::from(b"foo=\"bar baz\" qux"); + let params: Vec<_> = kargs.iter_bytes().collect(); + + assert_eq!(params.len(), 2); + assert_eq!(params[0], b"foo=\"bar baz\""); + assert_eq!(params[1], b"qux"); + } + + #[test] + fn test_iter_bytes_extra_whitespace() { + let kargs = Cmdline::from(b" foo bar "); + let params: Vec<_> = kargs.iter_bytes().collect(); + + assert_eq!(params.len(), 2); + assert_eq!(params[0], b"foo"); + assert_eq!(params[1], b"bar"); + } + + #[test] + fn test_iter_bytes_empty() { + let kargs = Cmdline::from(b""); + let params: Vec<_> = kargs.iter_bytes().collect(); + + assert_eq!(params.len(), 0); + } } diff --git a/crates/kernel_cmdline/src/utf8.rs b/crates/kernel_cmdline/src/utf8.rs index b066d652f..bfc12f292 100644 --- a/crates/kernel_cmdline/src/utf8.rs +++ b/crates/kernel_cmdline/src/utf8.rs @@ -14,7 +14,7 @@ use anyhow::Result; /// Wraps the raw command line bytes and provides methods for parsing and iterating /// over individual parameters. Uses copy-on-write semantics to avoid unnecessary /// allocations when working with borrowed data. -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct Cmdline<'a>(bytes::Cmdline<'a>); impl<'a, T: AsRef + ?Sized> From<&'a T> for Cmdline<'a> { @@ -49,7 +49,33 @@ impl<'a> Iterator for CmdlineIter<'a> { } } +/// An iterator over UTF-8 kernel command line parameters as string slices. +/// +/// This is created by the `iter_str` method on `Cmdline`. +#[derive(Debug)] +pub struct CmdlineIterStr<'a>(bytes::CmdlineIterBytes<'a>); + +impl<'a> Iterator for CmdlineIterStr<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + // Get the next byte slice from the underlying iterator + let bytes = self.0.next()?; + + // Convert to UTF-8 string slice + // SAFETY: We know this is valid UTF-8 since the Cmdline was constructed from valid UTF-8 + Some(str::from_utf8(bytes).expect("Parameter bytes come from valid UTF-8 cmdline")) + } +} + impl<'a> Cmdline<'a> { + /// Creates a new empty owned `Cmdline`. + /// + /// This is equivalent to `Cmdline::default()` but makes ownership explicit. + pub fn new() -> Cmdline<'static> { + Cmdline::default() + } + /// Reads the kernel command line from `/proc/cmdline`. /// /// Returns an error if: @@ -77,6 +103,14 @@ impl<'a> Cmdline<'a> { CmdlineIter(self.0.iter()) } + /// Returns an iterator over all parameters in the command line as string slices. + /// + /// This is similar to `iter()` but yields `&str` directly instead of `Parameter`, + /// which can be more convenient when you just need the string representation. + pub fn iter_str(&self) -> CmdlineIterStr<'_> { + CmdlineIterStr(self.0.iter_bytes()) + } + /// Locate a kernel argument with the given key name. /// /// Returns the first parameter matching the given key, or `None` if not found. @@ -276,36 +310,8 @@ impl<'a> Parameter<'a> { /// Returns `Some(Parameter)`, or `None` if a Parameter could not /// be constructed from the input. This occurs when the input is /// either empty or contains only whitespace. - /// - /// Any remaining characters not consumed from the input are - /// discarded. pub fn parse + ?Sized>(input: &'a T) -> Option { - Self::parse_one(input).0 - } - - /// Attempt to parse a single command line parameter from a UTF-8 - /// string. - /// - /// The first tuple item contains the parsed parameter, or `None` - /// if a Parameter could not be constructed from the input. This - /// occurs when the input is either empty or contains only - /// whitespace. - /// - /// Any remaining characters not consumed from the input are - /// returned as the second tuple item. - pub fn parse_one + ?Sized>(input: &'a T) -> (Option, &'a str) { - let (bytes, rest) = bytes::Parameter::parse_one(input.as_ref().as_bytes()); - - // SAFETY: we know this is valid UTF-8 since input is &str, - // and `rest` is a subslice of that &str which was split on - // whitespace - let rest = str::from_utf8(rest) - .expect("Splitting UTF-8 on ascii whitespace cannot produce invalid UTF-8 substrings"); - - match bytes { - Some(p) => (Some(Self(p)), rest), - None => (None, rest), - } + bytes::Parameter::parse(input.as_ref().as_bytes()).map(Self) } /// Construct a utf8::Parameter from a bytes::Parameter @@ -329,13 +335,6 @@ impl<'a> Parameter<'a> { str::from_utf8(p).expect("We only construct the underlying bytes from valid UTF-8") }) } - - /// Returns the parameter as a &str - pub fn as_str(&'a self) -> &'a str { - // SAFETY: We know this is valid UTF-8 since we only - // construct the underlying `bytes` from valid UTF-8 - str::from_utf8(&self.0).expect("We only construct the underlying bytes from valid UTF-8") - } } impl<'a> TryFrom> for Parameter<'a> { @@ -397,27 +396,19 @@ mod tests { } #[test] - fn test_parameter_parse_one() { - let (p, rest) = Parameter::parse_one("foo"); - let p = p.unwrap(); + fn test_parameter_parse() { + let p = Parameter::parse("foo").unwrap(); assert_eq!(p.key(), "foo".into()); assert_eq!(p.value(), None); - assert_eq!(rest, ""); - // should consume one parameter and return the rest of the input - let (p, rest) = Parameter::parse_one("foo=bar baz"); - let p = p.unwrap(); + // should parse only the first parameter and discard the rest of the input + let p = Parameter::parse("foo=bar baz").unwrap(); assert_eq!(p.key(), "foo".into()); assert_eq!(p.value(), Some("bar")); - assert_eq!(rest, " baz"); // should return None on empty or whitespace inputs - let (p, rest) = Parameter::parse_one(""); - assert!(p.is_none()); - assert_eq!(rest, ""); - let (p, rest) = Parameter::parse_one(" "); - assert!(p.is_none()); - assert_eq!(rest, ""); + assert!(Parameter::parse("").is_none()); + assert!(Parameter::parse(" ").is_none()); } #[test] @@ -446,11 +437,10 @@ mod tests { #[test] fn test_parameter_internal_key_whitespace() { - let (p, rest) = Parameter::parse_one("foo bar=baz"); - let p = p.unwrap(); + // parse should only consume the first parameter + let p = Parameter::parse("foo bar=baz").unwrap(); assert_eq!(p.key(), "foo".into()); assert_eq!(p.value(), None); - assert_eq!(rest, " bar=baz"); } #[test] @@ -550,6 +540,16 @@ mod tests { assert_eq!(kargs.iter().next(), None); } + #[test] + fn test_cmdline_new() { + let kargs = Cmdline::new(); + assert_eq!(kargs.iter().next(), None); + assert!(kargs.is_owned()); + + // Verify we can store it in a 'static context + let _static_kargs: Cmdline<'static> = Cmdline::new(); + } + #[test] fn test_kargs_simple_from_string() { let kargs = Cmdline::from("foo=bar,bar2 baz=fuz wiz".to_string());