Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace lex usages #11562

Merged
merged 1 commit into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions crates/ruff_dev/src/print_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use anyhow::Result;

use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::PySourceType;
use ruff_python_parser::{lexer, AsMode};
use ruff_python_parser::parse_unchecked_source;

#[derive(clap::Args)]
pub(crate) struct Args {
Expand All @@ -24,11 +24,13 @@ pub(crate) fn main(args: &Args) -> Result<()> {
args.file.display()
)
})?;
for (tok, range) in lexer::lex(source_kind.source_code(), source_type.as_mode()).flatten() {
let program = parse_unchecked_source(source_kind.source_code(), source_type);
for token in program.tokens() {
println!(
"{start:#?} {tok:#?} {end:#?}",
start = range.start(),
end = range.end()
"{start:#?} {kind:#?} {end:#?}",
start = token.start(),
end = token.end()
kind = token.kind()
);
}
Ok(())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use ruff_python_ast::{CmpOp, Expr};
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers;
use ruff_python_parser::{lexer, Mode, Tok};
use ruff_python_parser::{TokenKind, Tokens};
use ruff_text_size::{Ranged, TextRange, TextSize};

use crate::checkers::ast::Checker;
Expand Down Expand Up @@ -96,7 +96,7 @@ pub(crate) fn invalid_literal_comparison(
{
let mut diagnostic = Diagnostic::new(IsLiteral { cmp_op: op.into() }, expr.range());
if lazy_located.is_none() {
lazy_located = Some(locate_cmp_ops(expr, checker.locator().contents()));
lazy_located = Some(locate_cmp_ops(expr, checker.program().tokens()));
}
if let Some(located_op) = lazy_located.as_ref().and_then(|located| located.get(index)) {
assert_eq!(located_op.op, *op);
Expand Down Expand Up @@ -138,102 +138,85 @@ impl From<&CmpOp> for IsCmpOp {
}
}

/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate
/// ranges.
/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate ranges.
///
/// `RustPython` doesn't include line and column information on [`CmpOp`] nodes.
/// `CPython` doesn't either. This method iterates over the token stream and
/// re-identifies [`CmpOp`] nodes, annotating them with valid ranges.
fn locate_cmp_ops(expr: &Expr, source: &str) -> Vec<LocatedCmpOp> {
// If `Expr` is a multi-line expression, we need to parenthesize it to
// ensure that it's lexed correctly.
let contents = &source[expr.range()];
let parenthesized_contents = format!("({contents})");
let mut tok_iter = lexer::lex(&parenthesized_contents, Mode::Expression)
.flatten()
.skip(1)
.map(|(tok, range)| (tok, range - TextSize::from(1)))
.filter(|(tok, _)| !matches!(tok, Tok::NonLogicalNewline | Tok::Comment(_)))
/// This method iterates over the token stream and re-identifies [`CmpOp`] nodes, annotating them
/// with valid ranges.
fn locate_cmp_ops(expr: &Expr, tokens: &Tokens) -> Vec<LocatedCmpOp> {
let mut tok_iter = tokens
.tokens_in_range(expr.range())
.iter()
.filter(|token| !token.is_trivia())
.peekable();

let mut ops: Vec<LocatedCmpOp> = vec![];

// Track the bracket depth.
let mut par_count = 0u32;
let mut sqb_count = 0u32;
let mut brace_count = 0u32;
// Track the nesting level.
let mut nesting = 0u32;

loop {
let Some((tok, range)) = tok_iter.next() else {
let Some(token) = tok_iter.next() else {
break;
};

match tok {
Tok::Lpar => {
par_count = par_count.saturating_add(1);
match token.kind() {
TokenKind::Lpar | TokenKind::Lsqb | TokenKind::Lbrace => {
nesting = nesting.saturating_add(1);
}
Tok::Rpar => {
par_count = par_count.saturating_sub(1);
}
Tok::Lsqb => {
sqb_count = sqb_count.saturating_add(1);
}
Tok::Rsqb => {
sqb_count = sqb_count.saturating_sub(1);
}
Tok::Lbrace => {
brace_count = brace_count.saturating_add(1);
}
Tok::Rbrace => {
brace_count = brace_count.saturating_sub(1);
TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace => {
nesting = nesting.saturating_sub(1);
}
_ => {}
}

if par_count > 0 || sqb_count > 0 || brace_count > 0 {
if nesting > 0 {
continue;
}

match tok {
Tok::Not => {
if let Some((_, next_range)) = tok_iter.next_if(|(tok, _)| tok.is_in()) {
match token.kind() {
TokenKind::Not => {
if let Some((_, next_range)) =
tok_iter.next_if(|token| token.kind() == TokenKind::In)
{
ops.push(LocatedCmpOp::new(
TextRange::new(range.start(), next_range.end()),
TextRange::new(token.start(), next_range.end()),
CmpOp::NotIn,
));
}
}
Tok::In => {
ops.push(LocatedCmpOp::new(range, CmpOp::In));
TokenKind::In => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::In));
}
Tok::Is => {
let op = if let Some((_, next_range)) = tok_iter.next_if(|(tok, _)| tok.is_not()) {
TokenKind::Is => {
let op = if let Some((_, next_range)) =
tok_iter.next_if(|token| token.kind() == TokenKind::Not)
{
LocatedCmpOp::new(
TextRange::new(range.start(), next_range.end()),
TextRange::new(token.start(), next_range.end()),
CmpOp::IsNot,
)
} else {
LocatedCmpOp::new(range, CmpOp::Is)
LocatedCmpOp::new(token.range(), CmpOp::Is)
};
ops.push(op);
}
Tok::NotEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::NotEq));
TokenKind::NotEqual => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::NotEq));
}
Tok::EqEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::Eq));
TokenKind::EqEqual => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::Eq));
}
Tok::GreaterEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::GtE));
TokenKind::GreaterEqual => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::GtE));
}
Tok::Greater => {
ops.push(LocatedCmpOp::new(range, CmpOp::Gt));
TokenKind::Greater => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::Gt));
}
Tok::LessEqual => {
ops.push(LocatedCmpOp::new(range, CmpOp::LtE));
TokenKind::LessEqual => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::LtE));
}
Tok::Less => {
ops.push(LocatedCmpOp::new(range, CmpOp::Lt));
TokenKind::Less => {
ops.push(LocatedCmpOp::new(token.range(), CmpOp::Lt));
}
_ => {}
}
Expand Down Expand Up @@ -266,72 +249,70 @@ mod tests {

use super::{locate_cmp_ops, LocatedCmpOp};

fn extract_cmp_op_locations(source: &str) -> Result<Vec<LocatedCmpOp>> {
let program = parse_expression(source)?;
Ok(locate_cmp_ops(program.expr(), program.tokens()))
}

#[test]
fn extract_cmp_op_location() -> Result<()> {
fn test_locate_cmp_ops() -> Result<()> {
let contents = "x == 1";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::Eq
)]
);

let contents = "x != 1";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::NotEq
)]
);

let contents = "x is 1";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::Is
)]
);

let contents = "x is not 1";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(8),
CmpOp::IsNot
)]
);

let contents = "x in 1";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::In
)]
);

let contents = "x not in 1";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(8),
CmpOp::NotIn
)]
);

let contents = "x != (1 is not 2)";
let expr = parse_expression(contents)?.expr();
assert_eq!(
locate_cmp_ops(expr, contents),
extract_cmp_op_locations(contents)?,
vec![LocatedCmpOp::new(
TextSize::from(2)..TextSize::from(4),
CmpOp::NotEq
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use insta::assert_debug_snapshot;

use ruff_python_parser::lexer::lex;
use ruff_python_parser::{Mode, Tok};
use ruff_python_parser::{parse_module, Mode, Tok};
use ruff_python_trivia::{lines_after, lines_before, SimpleToken, SimpleTokenizer};
use ruff_python_trivia::{BackwardsTokenizer, SimpleTokenKind};
use ruff_text_size::{TextLen, TextRange, TextSize};
Expand All @@ -23,17 +23,8 @@ impl TokenizationTestCase {
}

fn tokenize_reverse(&self) -> Vec<SimpleToken> {
let comment_ranges: Vec<_> = lex(self.source, Mode::Module)
.filter_map(|result| {
let (token, range) = result.expect("Input to be a valid python program.");
if matches!(token, Tok::Comment(_)) {
Some(range)
} else {
None
}
})
.collect();
BackwardsTokenizer::new(self.source, self.range, &comment_ranges).collect()
let program = parse_module(self.source).expect("Input to be a valid Python program");
BackwardsTokenizer::new(self.source, self.range, program.comment_ranges()).collect()
}

fn tokens(&self) -> &[SimpleToken] {
Expand Down
4 changes: 2 additions & 2 deletions crates/ruff_wasm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ impl Workspace {
}

pub fn tokens(&self, contents: &str) -> Result<String, Error> {
let tokens: Vec<_> = ruff_python_parser::lexer::lex(contents, Mode::Module).collect();
let program = ruff_python_parser::parse_module(contents)?;

Ok(format!("{tokens:#?}"))
Ok(format!("{:#?}", program.tokens()))
}
}

Expand Down
Loading