Skip to content

Commit

Permalink
Avoid tracking parentheses nesting multiple times
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Sep 5, 2023
1 parent 2734e78 commit 098ee5d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 56 deletions.
42 changes: 15 additions & 27 deletions crates/ruff_python_parser/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ impl<'source> Lexer<'source> {
flags |= FStringContextFlags::TRIPLE;
};

self.fstrings.push(FStringContext::new(flags));
self.fstrings.push(FStringContext::new(flags, self.nesting));
Tok::FStringStart
}

Expand Down Expand Up @@ -565,7 +565,7 @@ impl<'source> Lexer<'source> {
let mut last_offset = self.offset();

let mut in_named_unicode = false;
let mut try_end_format_spec = false;
let mut end_format_spec = false;

loop {
match self.cursor.first() {
Expand Down Expand Up @@ -631,14 +631,18 @@ impl<'source> Lexer<'source> {
if in_named_unicode {
in_named_unicode = false;
self.cursor.bump();
} else if self.cursor.second() == '}' && !fstring.is_in_format_spec() {
} else if self.cursor.second() == '}'
&& !fstring.is_in_format_spec(self.nesting)
{
self.cursor.bump();
normalized
.push_str(&self.source[TextRange::new(last_offset, self.offset())]);
self.cursor.bump(); // Skip the second `}`
last_offset = self.offset();
} else {
try_end_format_spec = true;
// The lexer can only be in a format spec if we encounter a `}` token
// while scanning for `FStringMiddle` tokens.
end_format_spec = true;
break;
}
}
Expand All @@ -652,7 +656,7 @@ impl<'source> Lexer<'source> {

// Avoid emitting the empty `FStringMiddle` token for anything other than
// the closing curly braces (`}`).
if range.is_empty() && !try_end_format_spec {
if range.is_empty() && !end_format_spec {
return Ok(None);
}

Expand All @@ -668,15 +672,15 @@ impl<'source> Lexer<'source> {
normalized
};
let is_raw = fstring.is_raw_string();
if try_end_format_spec {
if end_format_spec {
// We need to decrement the format spec depth to avoid going into infinite
// loop where the lexer keeps on emitting an empty `FStringMiddle` token.
// This is because the lexer still thinks that we're in a f-string expression
// but as we've encountered a `}` token, we need to decrement the depth so
// that the lexer can go forward with the `Rbrace` token.
//
// SAFETY: Safe because the function is only called when `self.fstrings` is not empty.
self.fstrings.current_mut().unwrap().try_end_format_spec();
self.fstrings.current_mut().unwrap().end_format_spec();
}
Ok(Some(Tok::FStringMiddle { value, is_raw }))
}
Expand Down Expand Up @@ -773,7 +777,7 @@ impl<'source> Lexer<'source> {
// This function is used by the iterator implementation.
pub fn next_token(&mut self) -> LexResult {
if let Some(fstring) = self.fstrings.current() {
if !fstring.is_in_expression() {
if !fstring.is_in_expression(self.nesting) {
self.cursor.start_token();
if let Some(tok) = self.lex_fstring_middle_or_end()? {
if matches!(tok, Tok::FStringEnd) {
Expand Down Expand Up @@ -1059,49 +1063,33 @@ impl<'source> Lexer<'source> {
}
'~' => Tok::Tilde,
'(' => {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.increment_opening_parentheses();
}
self.nesting += 1;
Tok::Lpar
}
')' => {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.decrement_closing_parentheses();
}
self.nesting = self.nesting.saturating_sub(1);
Tok::Rpar
}
'[' => {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.increment_opening_parentheses();
}
self.nesting += 1;
Tok::Lsqb
}
']' => {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.decrement_closing_parentheses();
}
self.nesting = self.nesting.saturating_sub(1);
Tok::Rsqb
}
'{' => {
if let Some(fstring) = self.fstrings.current_mut() {
fstring.increment_opening_parentheses();
}
self.nesting += 1;
Tok::Lbrace
}
'}' => {
if let Some(fstring) = self.fstrings.current_mut() {
if !fstring.has_open_parentheses() {
if let Some(fstring) = self.fstrings.current() {
if !fstring.has_open_parentheses(self.nesting) {
return Err(LexicalError {
error: LexicalErrorType::FStringError(FStringErrorType::SingleRbrace),
location: self.token_start(),
});
}
fstring.decrement_closing_parentheses();
}
self.nesting = self.nesting.saturating_sub(1);
Tok::Rbrace
Expand All @@ -1110,7 +1098,7 @@ impl<'source> Lexer<'source> {
if self
.fstrings
.current_mut()
.is_some_and(FStringContext::try_start_format_spec)
.is_some_and(|fstring| fstring.try_start_format_spec(self.nesting))
{
Tok::Colon
} else if self.cursor.eat_char('=') {
Expand Down
47 changes: 18 additions & 29 deletions crates/ruff_python_parser/src/lexer/fstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ bitflags! {
pub(crate) struct FStringContext {
flags: FStringContextFlags,

/// The number of open parentheses for the current f-string. This includes all
/// three types of parentheses: round (`(`), square (`[`), and curly (`{`).
open_parentheses_count: u32,
/// The level of nesting for the lexer when it entered the current f-string.
nesting: u32,

/// The current depth of format spec for the current f-string. This is because
/// there can be multiple format specs nested for the same f-string.
Expand All @@ -36,11 +35,11 @@ pub(crate) struct FStringContext {
}

impl FStringContext {
pub(crate) fn new(flags: FStringContextFlags) -> Self {
pub(crate) fn new(flags: FStringContextFlags, nesting: u32) -> Self {
Self {
flags,
open_parentheses_count: 0,
format_spec_depth: 0,
nesting,
}
}

Expand Down Expand Up @@ -86,42 +85,34 @@ impl FStringContext {
self.flags.contains(FStringContextFlags::TRIPLE)
}

/// Returns `true` if the current f-string has open parentheses.
pub(crate) fn has_open_parentheses(&mut self) -> bool {
self.open_parentheses_count > 0
}

/// Increments the number of parentheses for the current f-string.
pub(crate) fn increment_opening_parentheses(&mut self) {
self.open_parentheses_count += 1;
fn open_parentheses_count(&self, current_nesting: u32) -> u32 {
current_nesting.saturating_sub(self.nesting)
}

/// Decrements the number of parentheses for the current f-string. If the
/// lexer is in a format spec, also decrements the format spec depth.
pub(crate) fn decrement_closing_parentheses(&mut self) {
self.try_end_format_spec();
self.open_parentheses_count = self.open_parentheses_count.saturating_sub(1);
/// Returns `true` if the current f-string has open parentheses.
pub(crate) fn has_open_parentheses(&self, current_nesting: u32) -> bool {
self.open_parentheses_count(current_nesting) > 0
}

/// Returns `true` if the lexer is in a f-string expression i.e., between
/// two curly braces.
pub(crate) fn is_in_expression(&self) -> bool {
self.open_parentheses_count > self.format_spec_depth
pub(crate) fn is_in_expression(&self, current_nesting: u32) -> bool {
self.open_parentheses_count(current_nesting) > self.format_spec_depth
}

/// Returns `true` if the lexer is in a f-string format spec i.e., after a colon.
pub(crate) fn is_in_format_spec(&self) -> bool {
self.format_spec_depth > 0 && !self.is_in_expression()
pub(crate) fn is_in_format_spec(&self, current_nesting: u32) -> bool {
self.format_spec_depth > 0 && !self.is_in_expression(current_nesting)
}

/// Returns `true` if the context is in a valid position to start format spec
/// i.e., at the same level of nesting as the opening parentheses token.
/// Increments the format spec depth if it is.
///
/// This assumes that the current character for the lexer is a colon (`:`).
pub(crate) fn try_start_format_spec(&mut self) -> bool {
pub(crate) fn try_start_format_spec(&mut self, current_nesting: u32) -> bool {
if self
.open_parentheses_count
.open_parentheses_count(current_nesting)
.saturating_sub(self.format_spec_depth)
== 1
{
Expand All @@ -132,11 +123,9 @@ impl FStringContext {
}
}

/// Decrements the format spec depth if the lexer is in a format spec.
pub(crate) fn try_end_format_spec(&mut self) {
if self.is_in_format_spec() {
self.format_spec_depth = self.format_spec_depth.saturating_sub(1);
}
/// Decrements the format spec depth unconditionally.
pub(crate) fn end_format_spec(&mut self) {
self.format_spec_depth = self.format_spec_depth.saturating_sub(1);
}
}

Expand Down

0 comments on commit 098ee5d

Please sign in to comment.