Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor shebang parsing to remove regex dependency #5690

Merged
merged 2 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions crates/ruff/src/checkers/physical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use ruff_python_whitespace::UniversalNewlines;

use crate::registry::Rule;
use crate::rules::flake8_copyright::rules::missing_copyright_notice;
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
use crate::rules::flake8_executable::helpers::ShebangDirective;
use crate::rules::flake8_executable::rules::{
shebang_missing, shebang_newline, shebang_not_executable, shebang_python, shebang_whitespace,
};
Expand Down Expand Up @@ -87,33 +87,35 @@ pub(crate) fn check_physical_lines(
|| enforce_shebang_newline
|| enforce_shebang_python
{
let shebang = extract_shebang(&line);
if enforce_shebang_not_executable {
if let Some(diagnostic) = shebang_not_executable(path, line.range(), &shebang) {
diagnostics.push(diagnostic);
if let Some(shebang) = ShebangDirective::try_extract(&line) {
has_any_shebang = true;
if enforce_shebang_not_executable {
if let Some(diagnostic) =
shebang_not_executable(path, line.range(), &shebang)
{
diagnostics.push(diagnostic);
}
}
}
if enforce_shebang_missing {
if !has_any_shebang && matches!(shebang, ShebangDirective::Match(..)) {
has_any_shebang = true;
if enforce_shebang_whitespace {
if let Some(diagnostic) =
shebang_whitespace(line.range(), &shebang, fix_shebang_whitespace)
{
diagnostics.push(diagnostic);
}
}
}
if enforce_shebang_whitespace {
if let Some(diagnostic) =
shebang_whitespace(line.range(), &shebang, fix_shebang_whitespace)
{
diagnostics.push(diagnostic);
if enforce_shebang_newline {
if let Some(diagnostic) =
shebang_newline(line.range(), &shebang, index == 0)
{
diagnostics.push(diagnostic);
}
}
}
if enforce_shebang_newline {
if let Some(diagnostic) = shebang_newline(line.range(), &shebang, index == 0) {
diagnostics.push(diagnostic);
}
}
if enforce_shebang_python {
if let Some(diagnostic) = shebang_python(line.range(), &shebang) {
diagnostics.push(diagnostic);
if enforce_shebang_python {
if let Some(diagnostic) = shebang_python(line.range(), &shebang) {
diagnostics.push(diagnostic);
}
}
} else {
}
}
}
Expand Down
120 changes: 62 additions & 58 deletions crates/ruff/src/rules/flake8_executable/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,84 +5,88 @@ use std::path::Path;

#[cfg(target_family = "unix")]
use anyhow::Result;
use once_cell::sync::Lazy;
use regex::Regex;
use ruff_text_size::{TextLen, TextSize};

static SHEBANG_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?P<spaces>\s*)#!(?P<directive>.*)").unwrap());

/// A shebang directive (e.g., `#!/usr/bin/env python3`).
#[derive(Debug, PartialEq, Eq)]
pub(crate) enum ShebangDirective<'a> {
None,
// whitespace length, start of the shebang, contents
Match(TextSize, TextSize, &'a str),
pub(crate) struct ShebangDirective<'a> {
/// The offset of the directive contents (e.g., `/usr/bin/env python3`) from the start of the
/// line.
pub(crate) offset: TextSize,
/// The contents of the directive (e.g., `"/usr/bin/env python3"`).
pub(crate) contents: &'a str,
}

pub(crate) fn extract_shebang(line: &str) -> ShebangDirective {
// Minor optimization to avoid matches in the common case.
if !line.contains('!') {
return ShebangDirective::None;
impl<'a> ShebangDirective<'a> {
/// Parse a shebang directive from a line, or return `None` if the line does not contain a
/// shebang directive.
pub(crate) fn try_extract(line: &'a str) -> Option<Self> {
// Trim whitespace.
let directive = Self::lex_whitespace(line);

// Trim the `#!` prefix.
let directive = Self::lex_char(directive, '#')?;
let directive = Self::lex_char(directive, '!')?;

Some(Self {
offset: line.text_len() - directive.text_len(),
contents: directive,
})
}
match SHEBANG_REGEX.captures(line) {
Some(caps) => match caps.name("spaces") {
Some(spaces) => match caps.name("directive") {
Some(matches) => ShebangDirective::Match(
spaces.as_str().text_len(),
TextSize::try_from(matches.start()).unwrap(),
matches.as_str(),
),
None => ShebangDirective::None,
},
None => ShebangDirective::None,
},
None => ShebangDirective::None,

/// Lex optional leading whitespace.
#[inline]
fn lex_whitespace(line: &str) -> &str {
line.trim_start()
}

/// Lex a specific character, or return `None` if the character is not the first character in
/// the line.
#[inline]
fn lex_char(line: &str, c: char) -> Option<&str> {
let mut chars = line.chars();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Cursor from the formatters SimpleTokenizer could be useful here because it provides these helpers already (with implementations that lower well to llvm bytecode)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll probably replace this and the noqa parser with that.

if chars.next() == Some(c) {
Some(chars.as_str())
} else {
None
}
}
}

#[cfg(target_family = "unix")]
pub(crate) fn is_executable(filepath: &Path) -> Result<bool> {
{
let metadata = filepath.metadata()?;
let permissions = metadata.permissions();
Ok(permissions.mode() & 0o111 != 0)
}
pub(super) fn is_executable(filepath: &Path) -> Result<bool> {
let metadata = filepath.metadata()?;
let permissions = metadata.permissions();
Ok(permissions.mode() & 0o111 != 0)
}

#[cfg(test)]
mod tests {
use ruff_text_size::TextSize;
use insta::assert_debug_snapshot;

use crate::rules::flake8_executable::helpers::ShebangDirective;

use crate::rules::flake8_executable::helpers::{
extract_shebang, ShebangDirective, SHEBANG_REGEX,
};
#[test]
fn shebang_non_match() {
let source = "not a match";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}

#[test]
fn shebang_regex() {
// Positive cases
assert!(SHEBANG_REGEX.is_match("#!/usr/bin/python"));
assert!(SHEBANG_REGEX.is_match("#!/usr/bin/env python"));
assert!(SHEBANG_REGEX.is_match(" #!/usr/bin/env python"));
assert!(SHEBANG_REGEX.is_match(" #!/usr/bin/env python"));
fn shebang_end_of_line() {
let source = "print('test') #!/usr/bin/python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}

// Negative cases
assert!(!SHEBANG_REGEX.is_match("hello world"));
#[test]
fn shebang_match() {
let source = "#!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}

#[test]
fn shebang_extract_match() {
assert_eq!(extract_shebang("not a match"), ShebangDirective::None);
assert_eq!(
extract_shebang("#!/usr/bin/env python"),
ShebangDirective::Match(TextSize::from(0), TextSize::from(2), "/usr/bin/env python")
);
assert_eq!(
extract_shebang(" #!/usr/bin/env python"),
ShebangDirective::Match(TextSize::from(2), TextSize::from(4), "/usr/bin/env python")
);
assert_eq!(
extract_shebang("print('test') #!/usr/bin/python"),
ShebangDirective::None
);
fn shebang_leading_space() {
let source = " #!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
}
19 changes: 8 additions & 11 deletions crates/ruff/src/rules/flake8_executable/rules/shebang_newline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,14 @@ pub(crate) fn shebang_newline(
shebang: &ShebangDirective,
first_line: bool,
) -> Option<Diagnostic> {
if let ShebangDirective::Match(_, start, content) = shebang {
if first_line {
None
} else {
let diagnostic = Diagnostic::new(
ShebangNotFirstLine,
TextRange::at(range.start() + start, content.text_len()),
);
Some(diagnostic)
}
} else {
let ShebangDirective { offset, contents } = shebang;

if first_line {
None
} else {
Some(Diagnostic::new(
ShebangNotFirstLine,
TextRange::at(range.start() + offset, contents.text_len()),
))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,16 @@ pub(crate) fn shebang_not_executable(
range: TextRange,
shebang: &ShebangDirective,
) -> Option<Diagnostic> {
if let ShebangDirective::Match(_, start, content) = shebang {
if let Ok(false) = is_executable(filepath) {
let diagnostic = Diagnostic::new(
ShebangNotExecutable,
TextRange::at(range.start() + start, content.text_len()),
);
return Some(diagnostic);
}
let ShebangDirective { offset, contents } = shebang;

if let Ok(false) = is_executable(filepath) {
let diagnostic = Diagnostic::new(
ShebangNotExecutable,
TextRange::at(range.start() + offset, contents.text_len()),
);
return Some(diagnostic);
}

None
}

Expand Down
19 changes: 7 additions & 12 deletions crates/ruff/src/rules/flake8_executable/rules/shebang_python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,14 @@ impl Violation for ShebangMissingPython {

/// EXE003
pub(crate) fn shebang_python(range: TextRange, shebang: &ShebangDirective) -> Option<Diagnostic> {
if let ShebangDirective::Match(_, start, content) = shebang {
if content.contains("python") || content.contains("pytest") {
None
} else {
let diagnostic = Diagnostic::new(
ShebangMissingPython,
TextRange::at(range.start() + start, content.text_len())
.sub_start(TextSize::from(2)),
);
let ShebangDirective { offset, contents } = shebang;

Some(diagnostic)
}
} else {
if contents.contains("python") || contents.contains("pytest") {
None
} else {
Some(Diagnostic::new(
ShebangMissingPython,
TextRange::at(range.start() + offset, contents.text_len()).sub_start(TextSize::from(2)),
))
}
}
34 changes: 19 additions & 15 deletions crates/ruff/src/rules/flake8_executable/rules/shebang_whitespace.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use ruff_text_size::{TextRange, TextSize};
use std::ops::Sub;

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
Expand Down Expand Up @@ -49,22 +50,25 @@ pub(crate) fn shebang_whitespace(
shebang: &ShebangDirective,
autofix: bool,
) -> Option<Diagnostic> {
if let ShebangDirective::Match(n_spaces, start, ..) = shebang {
if *n_spaces > TextSize::from(0) && *start == n_spaces + TextSize::from(2) {
let mut diagnostic = Diagnostic::new(
ShebangLeadingWhitespace,
TextRange::at(range.start(), *n_spaces),
);
if autofix {
diagnostic.set_fix(Fix::automatic(Edit::range_deletion(TextRange::at(
range.start(),
*n_spaces,
))));
}
Some(diagnostic)
} else {
None
let ShebangDirective {
offset,
contents: _,
} = shebang;

if *offset > TextSize::from(2) {
let leading_space_start = range.start();
let leading_space_len = offset.sub(TextSize::new(2));
let mut diagnostic = Diagnostic::new(
ShebangLeadingWhitespace,
TextRange::at(leading_space_start, leading_space_len),
);
if autofix {
diagnostic.set_fix(Fix::automatic(Edit::range_deletion(TextRange::at(
leading_space_start,
leading_space_len,
))));
}
Some(diagnostic)
} else {
None
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
None
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
Some(
ShebangDirective {
offset: 4,
contents: "/usr/bin/env python",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
Some(
ShebangDirective {
offset: 2,
contents: "/usr/bin/env python",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
None
Loading