Skip to content

Commit

Permalink
fix(UsageParser): Handle non-ascii names / options.
Browse files Browse the repository at this point in the history
This should also speeds up the parser (except maybe for short options).
Multi-codepoint characters still can't be used as short options, but people shouldn't non-ASCII options nyway.

Closes #664
  • Loading branch information
tormol committed Sep 30, 2016
1 parent 5c7a219 commit 1d6a7c6
Showing 1 changed file with 67 additions and 49 deletions.
116 changes: 67 additions & 49 deletions src/usage_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,12 @@ impl<'a> UsageParser<'a> {
loop {
debugln!("iter; pos={};", self.pos);
self.stop_at(token);
if self.pos < self.usage.len() {
if let Some(c) = self.usage.chars().nth(self.pos) {
match c {
'-' => self.short_or_long(&mut arg),
'.' => self.multiple(&mut arg),
'\'' => self.help(&mut arg),
_ => self.name(&mut arg),
}
if let Some(&c) = self.usage.as_bytes().get(self.pos) {
match c {
b'-' => self.short_or_long(&mut arg),
b'.' => self.multiple(&mut arg),
b'\'' => self.help(&mut arg),
_ => self.name(&mut arg),
}
} else {
break;
Expand All @@ -82,7 +80,7 @@ impl<'a> UsageParser<'a> {

fn name(&mut self, arg: &mut Arg<'a, 'a>) {
debugln!("fn=name;");
if self.usage.chars().nth(self.pos).expect(INTERNAL_ERROR_MSG) == '<' &&
if *self.usage.as_bytes().get(self.pos).expect(INTERNAL_ERROR_MSG) == b'<' &&
!self.explicit_name_set {
arg.setb(ArgSettings::Required);
}
Expand Down Expand Up @@ -113,23 +111,17 @@ impl<'a> UsageParser<'a> {
}

fn stop_at<F>(&mut self, f: F)
where F: Fn(u32) -> bool
where F: Fn(u8) -> bool
{
debugln!("fn=stop_at;");
self.start = self.pos;
for c in self.usage[self.start..].chars() {
if f(c as u32) {
self.pos += 1;
continue;
}
break;
}
self.pos += self.usage[self.start..].bytes().take_while(|&b| f(b) ).count();
}

fn short_or_long(&mut self, arg: &mut Arg<'a, 'a>) {
debugln!("fn=short_or_long;");
self.pos += 1;
if self.usage.chars().nth(self.pos).expect(INTERNAL_ERROR_MSG) == '-' {
if *self.usage.as_bytes().get(self.pos).expect(INTERNAL_ERROR_MSG) == b'-' {
self.pos += 1;
self.long(arg);
return;
Expand All @@ -141,7 +133,7 @@ impl<'a> UsageParser<'a> {
debugln!("fn=long;");
self.stop_at(long_end);
let name = &self.usage[self.start..self.pos];
if arg.name.is_empty() || (self.prev == UsageToken::Short && arg.name.len() == 1) {
if !self.explicit_name_set {
debugln!("setting name: {}", name);
arg.name = name;
}
Expand All @@ -152,41 +144,41 @@ impl<'a> UsageParser<'a> {

fn short(&mut self, arg: &mut Arg<'a, 'a>) {
debugln!("fn=short;");
let name = &self.usage[self.pos..self.pos + 1];
debugln!("setting short: {}", name);
arg.short = Some(name.chars().nth(0).expect(INTERNAL_ERROR_MSG));
let start = &self.usage[self.pos..];
let short = start.chars().nth(0).expect(INTERNAL_ERROR_MSG);
debugln!("setting short: {}", short);
arg.short = Some(short);
if arg.name.is_empty() {
// --long takes precedence but doesn't set self.explicit_name_set
let name = &start[..short.len_utf8()];
debugln!("setting name: {}", name);
arg.name = name;
}
self.prev = UsageToken::Short;
}

// "something..."
fn multiple(&mut self, arg: &mut Arg) {
debugln!("fn=multiple;");
let mut dot_counter = 1;
let start = self.pos;
for c in self.usage[start..].chars() {
match c {
'.' => {
dot_counter += 1;
self.pos += 1;
if dot_counter == 3 {
debugln!("setting multiple");
arg.setb(ArgSettings::Multiple);
if arg.settings.is_set(ArgSettings::TakesValue) {
arg.setb(ArgSettings::UseValueDelimiter);
arg.unsetb(ArgSettings::ValueDelimiterNotSet);
if arg.val_delim.is_none() {
arg.val_delim = Some(',');
}
}
self.prev = UsageToken::Multiple;
self.pos += 1;
break;
let mut bytes = self.usage[start..].bytes();
while bytes.next() == Some(b'.') {
dot_counter += 1;
self.pos += 1;
if dot_counter == 3 {
debugln!("setting multiple");
arg.setb(ArgSettings::Multiple);
if arg.settings.is_set(ArgSettings::TakesValue) {
arg.setb(ArgSettings::UseValueDelimiter);
arg.unsetb(ArgSettings::ValueDelimiterNotSet);
if arg.val_delim.is_none() {
arg.val_delim = Some(',');
}
}
_ => break,
self.prev = UsageToken::Multiple;
self.pos += 1;
break;
}
}
}
Expand All @@ -204,26 +196,26 @@ impl<'a> UsageParser<'a> {
}

#[inline]
fn name_end(b: u32) -> bool {
fn name_end(b: u8) -> bool {
// 93(]), 62(>)
b > b']' as u32 || b < b'>' as u32 || (b > b'>' as u32 && b < b']' as u32)
b > b']' || b < b'>' || (b > b'>' && b < b']')
}

#[inline]
fn token(b: u32) -> bool {
fn token(b: u8) -> bool {
// 39('), 45(-), 46(.), 60(<), 91([)
b < 39 || b > 91 || (b > 46 && b < 91 && b != b'<' as u32) || (b > 39 && b < 45)
b < 39 || b > 91 || (b > 46 && b < 91 && b != b'<') || (b > 39 && b < 45)
}

#[inline]
fn long_end(b: u32) -> bool {
// 39('), 46(.), 60(<), 61(=), 91([)
(b < 39 && (b > 13 && b != b' ' as u32)) || b > 91 || (b > 61 && b < 91) ||
fn long_end(b: u8) -> bool {
// 39('), 46(.), 60(<), 61(=), 91([), 32( )
(b < 39 && (b > 13 && b != b' ')) || b > 91 || (b > 61 && b < 91) ||
(b > 39 && b < 60 && b != 46)
}

#[inline]
fn help_start(b: u32) -> bool {
fn help_start(b: u8) -> bool {
// 39(')
b < 39 || b > 39
}
Expand Down Expand Up @@ -1220,4 +1212,30 @@ mod test {
assert!(c.val_names.is_none());
assert!(c.num_vals.is_none());
}

#[test]
fn nonascii() {
let a = Arg::from_usage("<ASCII> 'üñíčöĐ€'");
assert_eq!(a.name, "ASCII");
assert_eq!(a.help, Some("üñíčöĐ€"));
let a = Arg::from_usage("<üñíčöĐ€> 'ASCII'");
assert_eq!(a.name, "üñíčöĐ€");
assert_eq!(a.help, Some("ASCII"));
let a = Arg::from_usage("<üñíčöĐ€> 'üñíčöĐ€'");
assert_eq!(a.name, "üñíčöĐ€");
assert_eq!(a.help, Some("üñíčöĐ€"));
let a = Arg::from_usage("-ø 'ø'");
assert_eq!(a.name, "ø");
assert_eq!(a.short, Some('ø'));
assert_eq!(a.help, Some("ø"));
let a = Arg::from_usage("--üñíčöĐ€ 'Nōṫ ASCII'");
assert_eq!(a.name, "üñíčöĐ€");
assert_eq!(a.long, Some("üñíčöĐ€"));
assert_eq!(a.help, Some("Nōṫ ASCII"));
let a = Arg::from_usage("[ñämê] --ôpt=[üñíčöĐ€] 'hælp'");
assert_eq!(a.name, "ñämê");
assert_eq!(a.long, Some("ôpt"));
assert_eq!(a.val_names.unwrap().values().collect::<Vec<_>>(), [&"üñíčöĐ€"]);
assert_eq!(a.help, Some("hælp"));
}
}

0 comments on commit 1d6a7c6

Please sign in to comment.