Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace lex_starts_at with Tokens in the formatter #11515

Merged
merged 1 commit into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions crates/ruff_benchmark/benches/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,8 @@ fn benchmark_formatter(criterion: &mut Criterion) {
b.iter(|| {
let options = PyFormatOptions::from_extension(Path::new(case.name()))
.with_preview(PreviewMode::Enabled);
let formatted = format_module_ast(
program.syntax(),
program.comment_ranges(),
case.code(),
options,
)
.expect("Formatting to succeed");
let formatted = format_module_ast(&program, case.code(), options)
.expect("Formatting to succeed");

formatted.print().expect("Printing to succeed")
});
Expand Down
4 changes: 2 additions & 2 deletions crates/ruff_python_formatter/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ pub fn format_and_debug_print(source: &str, cli: &Cli, source_path: &Path) -> Re
});

let source_code = SourceCode::new(source);
let formatted = format_module_ast(program.syntax(), program.comment_ranges(), source, options)
.context("Failed to format node")?;
let formatted =
format_module_ast(&program, source, options).context("Failed to format node")?;
if cli.print_ir {
println!("{}", formatted.document().display(source_code));
}
Expand Down
14 changes: 13 additions & 1 deletion crates/ruff_python_formatter/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::other::f_string_element::FStringExpressionElementContext;
use crate::PyFormatOptions;
use ruff_formatter::{Buffer, FormatContext, GroupId, IndentWidth, SourceCode};
use ruff_python_ast::str::Quote;
use ruff_python_parser::Tokens;
use ruff_source_file::Locator;
use std::fmt::{Debug, Formatter};
use std::ops::{Deref, DerefMut};
Expand All @@ -12,6 +13,7 @@ pub struct PyFormatContext<'a> {
options: PyFormatOptions,
contents: &'a str,
comments: Comments<'a>,
tokens: &Tokens,
node_level: NodeLevel,
indent_level: IndentLevel,
/// Set to a non-None value when the formatter is running on a code
Expand All @@ -28,11 +30,17 @@ pub struct PyFormatContext<'a> {
}

impl<'a> PyFormatContext<'a> {
pub(crate) fn new(options: PyFormatOptions, contents: &'a str, comments: Comments<'a>) -> Self {
pub(crate) fn new(
options: PyFormatOptions,
contents: &'a str,
comments: Comments<'a>,
tokens: &Tokens,
) -> Self {
Self {
options,
contents,
comments,
tokens,
node_level: NodeLevel::TopLevel(TopLevelStatementPosition::Other),
indent_level: IndentLevel::new(0),
docstring: None,
Expand Down Expand Up @@ -69,6 +77,10 @@ impl<'a> PyFormatContext<'a> {
&self.comments
}

pub(crate) fn tokens(&self) -> &Tokens {
self.tokens
}

/// Returns a non-None value only if the formatter is running on a code
/// snippet within a docstring.
///
Expand Down
16 changes: 7 additions & 9 deletions crates/ruff_python_formatter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use ruff_formatter::prelude::*;
use ruff_formatter::{format, write, FormatError, Formatted, PrintError, Printed, SourceCode};
use ruff_python_ast::AstNode;
use ruff_python_ast::Mod;
use ruff_python_parser::{parse, AsMode, ParseError, ParseErrorType};
use ruff_python_parser::{parse, AsMode, ParseError, ParseErrorType, Program};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::Locator;

Expand Down Expand Up @@ -114,23 +114,22 @@ pub fn format_module_source(
) -> Result<Printed, FormatModuleError> {
let source_type = options.source_type();
let program = parse(source, source_type.as_mode())?;
let formatted = format_module_ast(program.syntax(), program.comment_ranges(), source, options)?;
let formatted = format_module_ast(&program, source, options)?;
Ok(formatted.print()?)
}

pub fn format_module_ast<'a>(
module: &'a Mod,
comment_ranges: &'a CommentRanges,
program: &'a Program<Mod>,
source: &'a str,
options: PyFormatOptions,
) -> FormatResult<Formatted<PyFormatContext<'a>>> {
let source_code = SourceCode::new(source);
let comments = Comments::from_ast(module, source_code, comment_ranges);
let comments = Comments::from_ast(program.syntax(), source_code, program.comment_ranges());
let locator = Locator::new(source);

let formatted = format!(
PyFormatContext::new(options, locator.contents(), comments),
[module.format()]
PyFormatContext::new(options, locator.contents(), comments, program.tokens()),
[program.syntax().format()]
)?;
formatted
.context()
Expand Down Expand Up @@ -201,8 +200,7 @@ def main() -> None:
let source_path = "code_inline.py";
let program = parse(source, source_type.as_mode()).unwrap();
let options = PyFormatOptions::from_extension(Path::new(source_path));
let formatted =
format_module_ast(program.syntax(), program.comment_ranges(), source, options).unwrap();
let formatted = format_module_ast(&program, source, options).unwrap();

// Uncomment the `dbg` to print the IR.
// Use `dbg_write!(f, []) instead of `write!(f, [])` in your formatting code to print some IR
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_python_formatter/src/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ pub fn format_range(
options.with_source_map_generation(SourceMapGeneration::Enabled),
source,
comments,
program.tokens(),
);

let (enclosing_node, base_indent) =
Expand Down
7 changes: 4 additions & 3 deletions crates/ruff_python_formatter/src/statement/suite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -859,16 +859,17 @@ def trailing_func():
pass
";

let module = parse_module(source).unwrap();
let program = parse_module(source).unwrap();

let context = PyFormatContext::new(
PyFormatOptions::default(),
source,
Comments::from_ranges(module.comment_ranges()),
Comments::from_ranges(program.comment_ranges()),
program.tokens(),
);

let test_formatter =
format_with(|f: &mut PyFormatter| module.suite().format().with_options(level).fmt(f));
format_with(|f: &mut PyFormatter| program.suite().format().with_options(level).fmt(f));

let formatted = format!(context, [test_formatter]).unwrap();
let printed = formatted.print().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_python_formatter/src/string/docstring.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1558,7 +1558,7 @@ fn docstring_format_source(
crate::Comments::from_ast(program.syntax(), source_code, program.comment_ranges());
let locator = Locator::new(source);

let ctx = PyFormatContext::new(options, locator.contents(), comments)
let ctx = PyFormatContext::new(options, locator.contents(), comments, program.tokens())
.in_docstring(docstring_quote_style);
let formatted = crate::format!(ctx, [program.syntax().format()])?;
formatted
Expand Down
57 changes: 28 additions & 29 deletions crates/ruff_python_formatter/src/verbatim.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::borrow::Cow;
use std::iter::FusedIterator;
use std::slice::Iter;

use unicode_width::UnicodeWidthStr;

use ruff_formatter::{write, FormatError};
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::Stmt;
use ruff_python_parser::lexer::{lex_starts_at, LexResult};
use ruff_python_parser::{Mode, Tok};
use ruff_python_parser::{self as parser, TokenKind};
use ruff_python_trivia::lines_before;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};
Expand Down Expand Up @@ -725,13 +725,13 @@ struct FormatVerbatimStatementRange {

impl Format<PyFormatContext<'_>> for FormatVerbatimStatementRange {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
let lexer = lex_starts_at(
&f.context().source()[self.verbatim_range],
Mode::Module,
self.verbatim_range.start(),
let logical_lines = LogicalLinesIter::new(
f.context()
.tokens()
.tokens_in_range(self.verbatim_range)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Unrelated to this pr. tokens_in_range reads a bit strange because the word tokens gets repeated. I think we could just call it in_range, assuming that the variable or method most likely is named tokens.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've made a note for that. I'll club this with other renames.

.iter(),
self.verbatim_range,
);

let logical_lines = LogicalLinesIter::new(lexer, self.verbatim_range);
let mut first = true;

for logical_line in logical_lines {
Expand Down Expand Up @@ -784,43 +784,47 @@ impl Format<PyFormatContext<'_>> for FormatVerbatimStatementRange {
}
}

struct LogicalLinesIter<I> {
lexer: I,
struct LogicalLinesIter<'a> {
tokens: Iter<'a, parser::Token>,
// The end of the last logical line
last_line_end: TextSize,
// The position where the content to lex ends.
content_end: TextSize,
}

impl<I> LogicalLinesIter<I> {
fn new(lexer: I, verbatim_range: TextRange) -> Self {
impl LogicalLinesIter<'_> {
fn new(tokens: Iter<'_, parser::Token>, verbatim_range: TextRange) -> Self {
Self {
lexer,
tokens,
last_line_end: verbatim_range.start(),
content_end: verbatim_range.end(),
}
}
}

impl<I> Iterator for LogicalLinesIter<I>
where
I: Iterator<Item = LexResult>,
{
impl Iterator for LogicalLinesIter {
type Item = FormatResult<LogicalLine>;

fn next(&mut self) -> Option<Self::Item> {
let mut parens = 0u32;

let (content_end, full_end) = loop {
match self.lexer.next() {
Some(Ok((token, range))) => match token {
Tok::Newline => break (range.start(), range.end()),
match self.tokens.next() {
Some(token) if token.kind() == TokenKind::Unknown => {
return Some(Err(FormatError::syntax_error(
"Unexpected token when lexing verbatim statement range.",
)))
}
Some(token) => match token.kind() {
TokenKind::Newline => break (token.start(), token.end()),
// Ignore if inside an expression
Tok::NonLogicalNewline if parens == 0 => break (range.start(), range.end()),
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
TokenKind::NonLogicalNewline if parens == 0 => {
break (token.start(), token.end())
}
TokenKind::Lbrace | TokenKind::Lpar | TokenKind::Lsqb => {
parens = parens.saturating_add(1);
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
TokenKind::Rbrace | TokenKind::Rpar | TokenKind::Rsqb => {
parens = parens.saturating_sub(1);
}
_ => {}
Expand All @@ -839,11 +843,6 @@ where
None
};
}
Some(Err(_)) => {
return Some(Err(FormatError::syntax_error(
"Unexpected token when lexing verbatim statement range.",
)))
}
}
};

Expand All @@ -857,7 +856,7 @@ where
}
}

impl<I> FusedIterator for LogicalLinesIter<I> where I: Iterator<Item = LexResult> {}
impl<I> FusedIterator for LogicalLinesIter<I> where I: Iterator<Item = parser::Token> {}

/// A logical line or a comment (or form feed only) line
struct LogicalLine {
Expand Down
5 changes: 5 additions & 0 deletions crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,11 @@ impl Tokens {
/// The range `4..10` would return a slice of `Name`, `Lpar`, `Rpar`, and `Colon` tokens. But,
/// if either the start or end position of the given range doesn't match any of the tokens
/// (like `5..10` or `4..12`), the returned slice will be empty.
///
/// ## Note
///
/// The returned slice can contain the [`TokenKind::Unknown`] token if there was a lexical
/// error encountered within the given range.
pub fn tokens_in_range(&self, range: TextRange) -> &[Token] {
let Ok(start) = self.binary_search_by_key(&range.start(), Ranged::start) else {
return &[];
Expand Down
7 changes: 1 addition & 6 deletions crates/ruff_wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,6 @@ impl<'a> ParsedModule<'a> {
.to_format_options(PySourceType::default(), self.source_code)
.with_source_map_generation(SourceMapGeneration::Enabled);

format_module_ast(
self.program.syntax(),
self.program.comment_ranges(),
self.source_code,
options,
)
format_module_ast(&self.program, self.source_code, options)
}
}
Loading