Skip to content

Commit

Permalink
Fix comment parsing inside command substitutions and brackets
Browse files Browse the repository at this point in the history
  • Loading branch information
Shai-Aviv authored and krobelus committed Feb 8, 2022
1 parent 627033f commit 2ef12af
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Scripting improvements
- ``abbr -q`` returns the correct exit status when given multiple abbreviation names as arguments (:issue:`8431`).
- ``command -v`` returns an exit status of 127 instead of 1 if no command was found (:issue:`8547`).
- ``argparse`` with ``--ignore-unknown`` no longer breaks with multiple unknown options in a short option group (:issue:`8637`).
- Comments inside command substitutions or brackets now correctly ignore parentheses, quotes, and brackets (:issue:`7866`, :issue:`8022`).

Interactive improvements
------------------------
Expand Down
7 changes: 7 additions & 0 deletions src/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,13 @@ const wchar_t *quote_end(const wchar_t *pos, wchar_t quote) {
return nullptr;
}

const wchar_t *comment_end(const wchar_t *pos) {
do {
pos++;
} while (*pos && *pos != L'\n');
return pos;
}

void fish_setlocale() {
// Use various Unicode symbols if they can be encoded using the current locale, else a simple
// ASCII char alternative. All of the can_be_encoded() invocations should return the same
Expand Down
5 changes: 5 additions & 0 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,11 @@ std::unique_ptr<T> make_unique(Args &&...args) {
/// \param quote the quote to use, usually pointed to by \c pos.
const wchar_t *quote_end(const wchar_t *pos, wchar_t quote);

/// This functions returns the end of the comment substring beginning at \c pos.
///
/// \param pos the position where the comment starts, including the '#' symbol.
const wchar_t *comment_end(const wchar_t *pos);

/// This function should be called after calling `setlocale()` to perform fish specific locale
/// initialization.
void fish_setlocale();
Expand Down
12 changes: 9 additions & 3 deletions src/parse_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ size_t parse_util_get_offset(const wcstring &str, int line, long line_offset) {
static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, const wchar_t **end,
bool allow_incomplete, bool *inout_is_quoted) {
bool escaped = false;
bool is_first = true;
bool is_token_begin = true;
bool syntax_error = false;
int paran_count = 0;
std::vector<int> quoted_cmdsubs;
Expand Down Expand Up @@ -122,6 +124,10 @@ static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, co
if (!escaped) {
if (*pos == L'\'' || *pos == L'"') {
if (!process_opening_quote(*pos)) break;
} else if (*pos == L'\\') {
escaped = true;
} else if (*pos == L'#' && is_token_begin) {
pos = comment_end(pos) - 1;
} else {
if (*pos == L'(') {
if ((paran_count == 0) && (paran_begin == nullptr)) {
Expand Down Expand Up @@ -161,12 +167,12 @@ static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, co
}
}
}
}
if (*pos == '\\') {
escaped = !escaped;
is_token_begin = is_token_delimiter(pos[0], is_first, pos[1]);
} else {
escaped = false;
is_token_begin = false;
}
is_first = false;
}

syntax_error |= (paran_count < 0);
Expand Down
13 changes: 10 additions & 3 deletions src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ tok_t tokenizer_t::read_string() {
int slice_offset = 0;
const wchar_t *const buff_start = this->token_cursor;
bool is_first = true;
bool is_token_begin = true;

auto process_opening_quote = [&](wchar_t quote) -> const wchar_t * {
const wchar_t *end = quote_end(this->token_cursor, quote);
Expand Down Expand Up @@ -192,6 +193,8 @@ tok_t tokenizer_t::read_string() {
// has been explicitly ignored (escaped).
else if (c == L'\\') {
mode |= tok_modes::char_escape;
} else if (c == L'#' && is_token_begin) {
this->token_cursor = comment_end(this->token_cursor) - 1;
} else if (c == L'(') {
paran_offsets.push_back(this->token_cursor - this->start);
expecting.push_back(L')');
Expand Down Expand Up @@ -278,8 +281,9 @@ tok_t tokenizer_t::read_string() {
FLOGF(error, msg.c_str(), c, c, int(mode_begin), int(mode));
#endif

this->token_cursor++;
is_token_begin = is_token_delimiter(this->token_cursor[0], is_first, this->token_cursor[1]);
is_first = false;
this->token_cursor++;
}

if (!this->accept_unfinished && (mode != tok_modes::regular_text)) {
Expand Down Expand Up @@ -540,8 +544,7 @@ maybe_t<tok_t> tokenizer_t::next() {
while (*this->token_cursor == L'#') {
// We have a comment, walk over the comment.
const wchar_t *comment_start = this->token_cursor;
while (this->token_cursor[0] != L'\n' && this->token_cursor[0] != L'\0')
this->token_cursor++;
this->token_cursor = comment_end(this->token_cursor);
size_t comment_len = this->token_cursor - comment_start;

// If we are going to continue after the comment, skip any trailing newline.
Expand Down Expand Up @@ -679,6 +682,10 @@ maybe_t<tok_t> tokenizer_t::next() {
return result;
}

bool is_token_delimiter(wchar_t c, bool is_first, maybe_t<wchar_t> next) {
return c == L'(' || !tok_is_string_character(c, is_first, next);
}

wcstring tok_first(const wcstring &str) {
tokenizer_t t(str.c_str(), 0);
if (auto token = t.next()) {
Expand Down
3 changes: 3 additions & 0 deletions src/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ class tokenizer_t : noncopyable_t {
}
};

/// Tests if this character can delimit tokens.
bool is_token_delimiter(wchar_t c, bool is_first, maybe_t<wchar_t> next);

/// Returns only the first token from the specified string. This is a convenience function, used to
/// retrieve the first token of a string. This can be useful for error messages, etc. On failure,
/// returns the empty string.
Expand Down
17 changes: 17 additions & 0 deletions tests/checks/basic.fish
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,23 @@ echo banana
echo (echo hello\\)
# CHECK: hello\

# This used to be a parse error - #7866.
echo (echo foo;#)
)
# CHECK: foo
echo (echo bar #'
)
# CHECK: bar
echo (#"
echo baz)
# CHECK: baz

# Make sure we don't match up brackets within comments (#8022).
$fish -c 'echo f[oo # not valid, no matching ]'
# CHECKERR: fish: Unexpected end of string, square brackets do not match
# CHECKERR: echo f[oo # not valid, no matching ]
# CHECKERR: {{ }}^

# Should fail because $PWD is read-only.
for PWD in foo bar
true
Expand Down

0 comments on commit 2ef12af

Please sign in to comment.