Skip to content

Commit

Permalink
feat(UTF-8): adds support for invalid utf8 in values
Browse files Browse the repository at this point in the history
Closes #269
  • Loading branch information
kbknapp committed Jan 23, 2016
1 parent 8476831 commit 7722618
Show file tree
Hide file tree
Showing 11 changed files with 434 additions and 170 deletions.
8 changes: 6 additions & 2 deletions src/app/macros.rs
@@ -1,5 +1,6 @@
macro_rules! remove_overriden {
($me:ident, $name:expr) => ({
debugln!("macro=remove_overriden!;");
if let Some(ref o) = $me.opts.iter().filter(|o| o.name == *$name).next() {
if let Some(ref ora) = o.requires {
for a in ora {
Expand Down Expand Up @@ -55,6 +56,7 @@ macro_rules! remove_overriden {
macro_rules! arg_post_processing(
($me:ident, $arg:ident, $matcher:ident) => ({
use args::AnyArg;
debugln!("macro=arg_post_processing!;");
// Handle POSIX overrides
debug!("Is '{}' in overrides...", $arg.to_string());
if $me.overrides.contains(&$arg.name()) {
Expand All @@ -78,10 +80,10 @@ macro_rules! arg_post_processing(
} else { sdebugln!("No"); }

// Handle conflicts
debugln!("Does '{}' have conflicts...", $arg.to_string());
debug!("Does '{}' have conflicts...", $arg.to_string());
if let Some(bl) = $arg.blacklist() {
for name in bl {
sdebugln!("\tYes '{}'", name);
sdebugln!("\n\tYes '{}'", name);
$me.blacklist.push(name);
vec_remove!($me.overrides, name);
vec_remove!($me.required, name);
Expand Down Expand Up @@ -109,6 +111,7 @@ macro_rules! arg_post_processing(
macro_rules! _handle_group_reqs{
($me:ident, $arg:ident) => ({
use args::AnyArg;
debugln!("macro=_handle_group_reqs!;");
for grp in $me.groups.values() {
let mut found = false;
for name in grp.args.iter() {
Expand Down Expand Up @@ -142,6 +145,7 @@ macro_rules! _handle_group_reqs{

macro_rules! validate_multiples {
($_self:ident, $a:ident, $m:ident) => {
debugln!("macro=validate_multiples!;");
if $m.contains(&$a.name) && !$a.settings.is_set(ArgSettings::Multiple) {
// Not the first time, and we don't allow multiples
return Err(Error::unexpected_multiple_usage($a, &*$_self.create_current_usage($m)))
Expand Down
216 changes: 122 additions & 94 deletions src/app/parser.rs

Large diffs are not rendered by default.

56 changes: 55 additions & 1 deletion src/app/settings.rs
Expand Up @@ -29,7 +29,7 @@ pub struct AppFlags(Flags);

impl AppFlags {
pub fn new() -> Self {
AppFlags(NEEDS_LONG_VERSION | NEEDS_LONG_HELP | NEEDS_SC_HELP | UTF8_STRICT)
AppFlags(NEEDS_LONG_VERSION | NEEDS_LONG_HELP | NEEDS_SC_HELP | UTF8_NONE)
}

pub fn set(&mut self, s: AppSettings) {
Expand Down Expand Up @@ -329,7 +329,61 @@ pub enum AppSettings {
/// }
/// ```
AllowExternalSubcommands,
/// Specifies that any invalid UTF-8 code points should be treated as an error and fail
/// with a `ErrorKind::InvalidUtf8` error.
///
/// **NOTE:** This rule only applies to argument values, as flags, options, and subcommands
/// only allow valid UTF-8 code points.
///
/// # Examples
///
/// ```ignore
/// # use clap::{App, Arg, AppSettings, ErrorKind};
/// use std::ffi::OsString;
///
/// let m = App::new("myprog")
/// .setting(AppSettings::StrictUtf8)
/// .arg_from_usage("<arg> 'some positional arg'")
/// .get_matches_from_safe(
/// vec![
/// OsString::from("myprog"),
/// OsString::from_vec(vec![0xe9])]);
///
/// assert!(m.is_err());
/// assert_eq!(m.unwrap_err().kind, ErrorKind::InvalidUtf8);
/// }
/// ```
StrictUtf8,
/// Specifies that any invalid UTF-8 code points should *not* be treated as an error. This is
/// the default behavior of `clap`
///
/// **NOTE:** Using argument values with invalid UTF-8 code points requires using Either
/// `ArgMatches::os_value(s)_of` or `ArgMatches::lossy_value(s)_of` for those particular
/// arguments which may have have invalid UTF-8 values
///
/// **NOTE:** This rule only applies to argument values, as flags, options, and subcommands
/// only allow valid UTF-8 code points.
///
/// # Examples
///
/// ```ignore
/// # use clap::{App, Arg, AppSettings};
/// use std::ffi::OsString;
/// use std::os::unix::ffi::OsStrExt;
///
/// let r = App::new("myprog")
/// .setting(AppSettings::StrictUtf8)
/// .arg_from_usage("<arg> 'some positional arg'")
/// .get_matches_from_safe(
/// vec![
/// OsString::from("myprog"),
/// OsString::from_vec(vec![0xe9])]);
///
/// assert!(r.is_ok());
/// let m = r.unwrap();
/// assert_eq!(m.os_value_of("arg").unwrap().as_bytes(), &[0xe9]);
/// }
/// ```
AllowInvalidUtf8,
#[doc(hidden)]
NeedsLongVersion,
Expand Down
10 changes: 10 additions & 0 deletions src/args/arg_matches.rs
Expand Up @@ -2,6 +2,7 @@ use std::ffi::{OsString, OsStr};
use std::collections::HashMap;
use std::iter::Map;
use std::slice;
use std::borrow::Cow;

use vec_map;

Expand Down Expand Up @@ -119,6 +120,15 @@ impl<'a> ArgMatches<'a> {
None
}

pub fn lossy_value_of<S: AsRef<str>>(&'a self, name: S) -> Option<Cow<'a, str>> {
if let Some(arg) = self.args.get(name.as_ref()) {
if let Some(v) = arg.vals.values().nth(0) {
return Some(v.to_string_lossy());
}
}
None
}

pub fn os_value_of<S: AsRef<str>>(&self, name: S) -> Option<&OsStr> {
self.args.get(name.as_ref()).map(|arg| arg.vals.values().nth(0).map(|v| v.as_os_str())).unwrap_or(None)
}
Expand Down
10 changes: 4 additions & 6 deletions src/errors.rs
Expand Up @@ -234,26 +234,24 @@ pub enum ErrorKind {
/// Occurs when the user provides a value containing invalid UTF-8 for an argument and
/// `AppSettings::StrictUtf8` is set.
///
/// **Note:** This is the default setting and behavior. If you wish to *allow* invalid UTF-8 in
/// argument values, use `AppSettings::AllowInvalidUtf8`
///
/// # Platform Speicific
///
/// Non-Windows platforms only (such as Linux, Unix, OSX, etc.)
///
/// # Examples
///
/// ```ignore
/// # use clap::{App, Arg, ErrorKind};
/// # use clap::{App, Arg, ErrorKind, AppSettings};
/// # use std::os::unix::ffi::OsStringExt;
/// # use std::ffi::OsString;
/// let result = App::new("myprog")
/// .arg(Arg::with_name("debug")
/// .setting(AppSettings::StrictUtf8)
/// .arg(Arg::with_name("utf8")
/// .short("u")
/// .takes_value(true))
/// .get_matches_from_safe(vec![OsString::from("myprog"),
/// OsString::from("-u")
/// OsString::from_vec(vec![0x20, 0xE9])]);
/// OsString::from_vec(vec![0xE9])]);
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().kind, ErrorKind::InvalidUtf8);
/// ```
Expand Down
4 changes: 4 additions & 0 deletions src/macros.rs
Expand Up @@ -65,6 +65,7 @@ macro_rules! load_yaml {
// used in src/args/arg_builder/option.rs
macro_rules! print_opt_help {
($opt:ident, $spc:expr, $w:ident) => {
debugln!("macro=print_opt_help!;");
if let Some(h) = $opt.help {
if h.contains("{n}") {
let mut hel = h.split("{n}");
Expand Down Expand Up @@ -96,6 +97,7 @@ macro_rules! print_opt_help {
// src/app/mod.rs
macro_rules! write_spaces {
($num:expr, $w:ident) => ({
debugln!("macro=write_spaces!;");
for _ in 0..$num {
try!(write!($w, " "));
}
Expand All @@ -105,6 +107,7 @@ macro_rules! write_spaces {
// convenience macro for remove an item from a vec
macro_rules! vec_remove {
($vec:expr, $to_rem:ident) => {
debugln!("macro=write_spaces!;");
{
let mut ix = None;
$vec.dedup();
Expand All @@ -127,6 +130,7 @@ macro_rules! vec_remove {
// item.
macro_rules! for_match {
($it:ident, $($p:pat => $($e:expr);+),*) => {
debugln!("macro=for_match!;");
for i in $it {
match i {
$(
Expand Down
5 changes: 5 additions & 0 deletions src/osstringext.rs
Expand Up @@ -8,6 +8,7 @@ pub trait OsStrExt2 {
fn trim_left_matches(&self, b: u8) -> &OsStr;
fn len(&self) -> usize;
fn contains_byte(&self, b: u8) -> bool;
fn is_empty(&self) -> bool;
}

impl OsStrExt2 for OsStr {
Expand All @@ -21,6 +22,10 @@ impl OsStrExt2 for OsStr {
return true;
}

fn is_empty(&self) -> bool {
self.as_bytes().is_empty()
}

fn contains_byte(&self, byte: u8) -> bool {
for b in self.as_bytes() {
if b == &byte { return true; }
Expand Down
34 changes: 0 additions & 34 deletions src/utf8.rs
@@ -1,35 +1 @@
// use std::ffi::OsStr;
// use std::borrow::Cow;
//
// pub trait Utf8Rule { type Out; fn into(&OsStr) -> <Self as Utf8Rule>::Out; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub struct Strict<'a>;
// impl<'a> Utf8Rule for Strict<'a> { type Out = &'a str; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub struct Lossy<'a>;
// impl<'a> Utf8Rule for Lossy<'a> { type Out = Cow<'a, str>; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub struct AllowInvalid<'a>;
// impl<'a> Utf8Rule for AllowInvalid<'a> { type Out = &'a OsStr; }
//
// #[derive(Copy, Clone, Debug, PartialEq)]
// pub enum Utf8 {
// Strict,
// Lossy,
// AllowInvalid,
// }
//
// impl Utf8 {
// pub fn into<U: UtfRule>(&self) -> U::Out {
// match *self {
// Utf::Strict => Strict::,
// Utf::Lossy =>,
// Utf::AllowInvalid =>,
// }
// }
// }

pub const INVALID_UTF8: &'static str = "unexpected invalid UTF-8 code point";
8 changes: 5 additions & 3 deletions tests/posix_compatible.rs
Expand Up @@ -116,9 +116,11 @@ fn conflict_overriden_2() {
.arg(Arg::from_usage("-c, --color 'third flag'")
.mutually_overrides_with("flag"))
.get_matches_from_safe(vec!["myprog", "-f", "-d", "-c"]);
assert!(result.is_err());
let err = result.err().unwrap();
assert_eq!(err.kind, ErrorKind::ArgumentConflict);
assert!(result.is_ok());
let m = result.unwrap();
assert!(m.is_present("color"));
assert!(m.is_present("debug"));
assert!(!m.is_present("flag"));
}

#[test]
Expand Down
30 changes: 0 additions & 30 deletions tests/unicode.rs

This file was deleted.

0 comments on commit 7722618

Please sign in to comment.