Skip to content

Commit

Permalink
refactor: Reduce code size of testing tokens if they're a number
Browse files Browse the repository at this point in the history
This commit is a tiny win in compiled code size of a final binary
including `clap` which shaves off 19k of compiled code locally.
Previously tokens were checked if they were a number by using
`.parse::<f64>().is_ok()`, but parsing floats is relatively heavyweight
in terms of code size. This replaces the check with a more naive "does
this string have lots of ascii digits" check where the compiled size of
this check should be much smaller.
  • Loading branch information
alexcrichton committed Oct 24, 2023
1 parent 1b84314 commit 9a9aabc
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 24 deletions.
6 changes: 3 additions & 3 deletions clap_builder/src/parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ impl<'cmd> Parser<'cmd> {

if self.cmd[current_positional.get_id()].is_allow_hyphen_values_set()
|| (self.cmd[current_positional.get_id()].is_allow_negative_numbers_set()
&& next.is_number())
&& next.is_negative_number())
{
// If allow hyphen, this isn't a new arg.
debug!("Parser::is_new_arg: Allow hyphen");
Expand Down Expand Up @@ -841,7 +841,7 @@ impl<'cmd> Parser<'cmd> {

#[allow(clippy::blocks_in_if_conditions)]
if matches!(parse_state, ParseState::Opt(opt) | ParseState::Pos(opt)
if self.cmd[opt].is_allow_hyphen_values_set() || (self.cmd[opt].is_allow_negative_numbers_set() && short_arg.is_number()))
if self.cmd[opt].is_allow_hyphen_values_set() || (self.cmd[opt].is_allow_negative_numbers_set() && short_arg.is_negative_number()))
{
debug!("Parser::parse_short_args: prior arg accepts hyphenated values",);
return Ok(ParseResult::MaybeHyphenValue);
Expand All @@ -851,7 +851,7 @@ impl<'cmd> Parser<'cmd> {
.get(&pos_counter)
.map(|arg| arg.is_allow_negative_numbers_set())
.unwrap_or_default()
&& short_arg.is_number()
&& short_arg.is_negative_number()
{
debug!("Parser::parse_short_arg: negative number");
return Ok(ParseResult::MaybeHyphenValue);
Expand Down
46 changes: 41 additions & 5 deletions clap_lex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,14 @@ impl<'s> ParsedArg<'s> {
self.inner == "--"
}

/// Does the argument look like a number
pub fn is_number(&self) -> bool {
/// Does the argument look like a negative number?
///
/// This won't parse the number in full but attempts to see if this looks
/// like something along the lines of `-3`, `-0.3`, or `-33.03`
pub fn is_negative_number(&self) -> bool {
self.to_value()
.map(|s| s.parse::<f64>().is_ok())
.ok()
.and_then(|s| Some(is_number(s.strip_prefix('-')?)))
.unwrap_or_default()
}

Expand Down Expand Up @@ -408,8 +412,8 @@ impl<'s> ShortFlags<'s> {
/// Does the short flag look like a number
///
/// Ideally call this before doing any iterator
pub fn is_number(&self) -> bool {
self.invalid_suffix.is_none() && self.utf8_prefix.as_str().parse::<f64>().is_ok()
pub fn is_negative_number(&self) -> bool {
self.invalid_suffix.is_none() && is_number(self.utf8_prefix.as_str())
}

/// Advance the iterator, returning the next short flag on success
Expand Down Expand Up @@ -466,3 +470,35 @@ fn split_nonutf8_once(b: &OsStr) -> (&str, Option<&OsStr>) {
}
}
}

fn is_number(arg: &str) -> bool {
// Return true if this looks like an integer or a float where it's all
// digits plus an optional single dot after some digits.
//
// For floats allow forms such as `1.`, `1.2`, `1.2e10`, etc.
let mut seen_dot = false;
let mut position_of_e = None;
for (i, c) in arg.as_bytes().iter().enumerate() {
match c {
// Digits are always valid
b'0'..=b'9' => {}

// Allow a `.`, but only one, only if it comes before an
// optional exponent, and only if it's not the first character.
b'.' if !seen_dot && position_of_e.is_none() && i > 0 => seen_dot = true,

// Allow an exponent `e` but only at most one after the first
// character.
b'e' if position_of_e.is_none() && i > 0 => position_of_e = Some(i),

_ => return false,
}
}

// Disallow `-1e` which isn't a valid float since it doesn't actually have
// an exponent.
match position_of_e {
Some(i) => i != arg.len() - 1,
None => true,
}
}
31 changes: 19 additions & 12 deletions clap_lex/tests/parsed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,14 @@ fn to_short() {

#[test]
fn is_negative_number() {
let raw = clap_lex::RawArgs::new(["bin", "-10.0"]);
let mut cursor = raw.cursor();
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();
for number in ["-10.0", "-1", "-100", "-3.5", "-1e10", "-1.3e10"] {
let raw = clap_lex::RawArgs::new(["bin", number]);
let mut cursor = raw.cursor();
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();

assert!(next.is_number());
assert!(next.is_negative_number());
}
}

#[test]
Expand All @@ -135,17 +137,22 @@ fn is_positive_number() {
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();

assert!(next.is_number());
assert!(!next.is_negative_number());
}

#[test]
fn is_not_number() {
let raw = clap_lex::RawArgs::new(["bin", "--10.0"]);
let mut cursor = raw.cursor();
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();

assert!(!next.is_number());
for number in ["--10.0", "-..", "-2..", "-e", "-1e", "-1e10.2", "-.2"] {
let raw = clap_lex::RawArgs::new(["bin", number]);
let mut cursor = raw.cursor();
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();

assert!(
!next.is_negative_number(),
"`{number}` is mistakenly classified as a number"
);
}
}

#[test]
Expand Down
8 changes: 4 additions & 4 deletions clap_lex/tests/shorts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,23 +151,23 @@ fn is_exhausted_empty() {
}

#[test]
fn is_number() {
fn is_negative_number() {
let raw = clap_lex::RawArgs::new(["bin", "-1.0"]);
let mut cursor = raw.cursor();
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();
let shorts = next.to_short().unwrap();

assert!(shorts.is_number());
assert!(shorts.is_negative_number());
}

#[test]
fn is_not_number() {
fn is_not_negaitve_number() {
let raw = clap_lex::RawArgs::new(["bin", "-hello"]);
let mut cursor = raw.cursor();
assert_eq!(raw.next_os(&mut cursor), Some(std::ffi::OsStr::new("bin")));
let next = raw.next(&mut cursor).unwrap();
let shorts = next.to_short().unwrap();

assert!(!shorts.is_number());
assert!(!shorts.is_negative_number());
}

0 comments on commit 9a9aabc

Please sign in to comment.