Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update string nodes for implicit concatenation #7927

Merged
merged 14 commits into from
Nov 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions crates/ruff_linter/src/checkers/ast/analyze/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,21 +158,23 @@ pub(crate) fn definitions(checker: &mut Checker) {
}

// Extract a `Docstring` from a `Definition`.
let Some(expr) = docstring else {
let Some(string_literal) = docstring else {
pydocstyle::rules::not_missing(checker, definition, *visibility);
continue;
};

let contents = checker.locator().slice(expr);
let contents = checker.locator().slice(string_literal);

let indentation = checker.locator().slice(TextRange::new(
checker.locator.line_start(expr.start()),
expr.start(),
checker.locator.line_start(string_literal.start()),
string_literal.start(),
));

if expr.implicit_concatenated {
if string_literal.value.is_implicit_concatenated() {
#[allow(deprecated)]
let location = checker.locator.compute_source_location(expr.start());
let location = checker
.locator
.compute_source_location(string_literal.start());
warn_user!(
"Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
relativize_path(checker.path),
Expand All @@ -186,7 +188,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
let body_range = raw_contents_range(contents).unwrap();
let docstring = Docstring {
definition,
expr,
expr: string_literal,
contents,
body_range,
indentation,
Expand Down
20 changes: 16 additions & 4 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -985,15 +985,22 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
pylint::rules::await_outside_async(checker, expr);
}
}
Expr::FString(fstring @ ast::ExprFString { values, .. }) => {
Expr::FString(f_string_expr @ ast::ExprFString { value, .. }) => {
if checker.enabled(Rule::FStringMissingPlaceholders) {
pyflakes::rules::f_string_missing_placeholders(fstring, checker);
pyflakes::rules::f_string_missing_placeholders(checker, f_string_expr);
}
if checker.enabled(Rule::ExplicitFStringTypeConversion) {
for f_string in value.f_strings() {
ruff::rules::explicit_f_string_type_conversion(checker, f_string);
}
}
if checker.enabled(Rule::HardcodedSQLExpression) {
flake8_bandit::rules::hardcoded_sql_expression(checker, expr);
}
if checker.enabled(Rule::ExplicitFStringTypeConversion) {
ruff::rules::explicit_f_string_type_conversion(checker, expr, values);
if checker.enabled(Rule::UnicodeKindPrefix) {
for string_literal in value.literals() {
pyupgrade::rules::unicode_kind_prefix(checker, string_literal);
}
}
}
Expr::BinOp(ast::ExprBinOp {
Expand Down Expand Up @@ -1275,6 +1282,11 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::HardcodedTempFile) {
flake8_bandit::rules::hardcoded_tmp_directory(checker, string);
}
if checker.enabled(Rule::UnicodeKindPrefix) {
for string_part in string.value.parts() {
pyupgrade::rules::unicode_kind_prefix(checker, string_part);
}
}
if checker.source_type.is_stub() {
if checker.enabled(Rule::StringOrBytesTooLong) {
flake8_pyi::rules::string_or_bytes_too_long(checker, expr);
Expand Down
6 changes: 3 additions & 3 deletions crates/ruff_linter/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1303,9 +1303,9 @@ where

fn visit_format_spec(&mut self, format_spec: &'b Expr) {
match format_spec {
Expr::FString(ast::ExprFString { values, .. }) => {
for value in values {
self.visit_expr(value);
Expr::FString(ast::ExprFString { value, .. }) => {
for expr in value.elements() {
self.visit_expr(expr);
}
}
_ => unreachable!("Unexpected expression for format_spec"),
Expand Down
4 changes: 0 additions & 4 deletions crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,6 @@ pub(crate) fn check_tokens(
pycodestyle::rules::tab_indentation(&mut diagnostics, tokens, locator, indexer);
}

if settings.rules.enabled(Rule::UnicodeKindPrefix) {
pyupgrade::rules::unicode_kind_prefix(&mut diagnostics, tokens);
}

if settings.rules.any_enabled(&[
Rule::InvalidCharacterBackspace,
Rule::InvalidCharacterSub,
Expand Down
29 changes: 26 additions & 3 deletions crates/ruff_linter/src/cst/matchers.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::fix::codemods::CodegenStylist;
use anyhow::{bail, Result};
use libcst_native::{
Arg, Attribute, Call, Comparison, CompoundStatement, Dict, Expression, FunctionDef,
GeneratorExp, If, Import, ImportAlias, ImportFrom, ImportNames, IndentedBlock, Lambda,
ListComp, Module, Name, SmallStatement, Statement, Suite, Tuple, With,
Arg, Attribute, Call, Comparison, CompoundStatement, Dict, Expression, FormattedString,
FormattedStringContent, FormattedStringExpression, FunctionDef, GeneratorExp, If, Import,
ImportAlias, ImportFrom, ImportNames, IndentedBlock, Lambda, ListComp, Module, Name,
SmallStatement, Statement, Suite, Tuple, With,
};
use ruff_python_codegen::Stylist;

Expand Down Expand Up @@ -153,6 +154,28 @@ pub(crate) fn match_lambda<'a, 'b>(expression: &'a Expression<'b>) -> Result<&'a
}
}

pub(crate) fn match_formatted_string<'a, 'b>(
expression: &'a mut Expression<'b>,
) -> Result<&'a mut FormattedString<'b>> {
if let Expression::FormattedString(formatted_string) = expression {
Ok(formatted_string)
} else {
bail!("Expected Expression::FormattedString");
}
}

pub(crate) fn match_formatted_string_expression<'a, 'b>(
formatted_string_content: &'a mut FormattedStringContent<'b>,
) -> Result<&'a mut FormattedStringExpression<'b>> {
if let FormattedStringContent::Expression(formatted_string_expression) =
formatted_string_content
{
Ok(formatted_string_expression)
} else {
bail!("Expected FormattedStringContent::Expression")
}
}

pub(crate) fn match_function_def<'a, 'b>(
statement: &'a mut Statement<'b>,
) -> Result<&'a mut FunctionDef<'b>> {
Expand Down
1 change: 0 additions & 1 deletion crates/ruff_linter/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ impl Rule {
| Rule::TabIndentation
| Rule::TrailingCommaOnBareTuple
| Rule::TypeCommentInStub
| Rule::UnicodeKindPrefix
| Rule::UselessSemicolon
| Rule::UTF8EncodingDeclaration => LintSource::Tokens,
Rule::IOError => LintSource::Io,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ pub(crate) fn variable_name_task_id(
let ast::ExprStringLiteral { value: task_id, .. } = keyword.value.as_string_literal_expr()?;

// If the target name is the same as the task_id, no violation.
if id == task_id {
if task_id == id {
return None;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,6 @@ fn check_dynamically_typed<F>(
if let Expr::StringLiteral(ast::ExprStringLiteral {
range,
value: string,
..
}) = annotation
{
// Quoted annotations
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/rules/flake8_bandit/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ static PASSWORD_CANDIDATE_REGEX: Lazy<Regex> = Lazy::new(|| {

pub(super) fn string_literal(expr: &Expr) -> Option<&str> {
match expr {
Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => Some(value),
Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => Some(value.as_str()),
_ => None,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl Violation for HardcodedBindAllInterfaces {

/// S104
pub(crate) fn hardcoded_bind_all_interfaces(string: &ExprStringLiteral) -> Option<Diagnostic> {
if string.value == "0.0.0.0" {
if string.value.as_str() == "0.0.0.0" {
Some(Diagnostic::new(HardcodedBindAllInterfaces, string.range))
} else {
None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@ use ruff_python_ast::{self as ast, Expr, Operator};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::any_over_expr;
use ruff_python_semantic::SemanticModel;
use ruff_python_ast::str::raw_contents;
use ruff_source_file::Locator;
use ruff_text_size::Ranged;

use crate::checkers::ast::Checker;

use super::super::helpers::string_literal;

static SQL_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"(?i)\b(select\s.+\sfrom\s|delete\s+from\s|(insert|replace)\s.+\svalues\s|update\s.+\sset\s)")
.unwrap()
Expand Down Expand Up @@ -46,53 +45,77 @@ impl Violation for HardcodedSQLExpression {
}
}

fn has_string_literal(expr: &Expr) -> bool {
string_literal(expr).is_some()
}

fn matches_sql_statement(string: &str) -> bool {
SQL_REGEX.is_match(string)
/// Concatenates the contents of an f-string, without the prefix and quotes,
/// and escapes any special characters.
///
/// ## Example
///
/// ```python
/// "foo" f"bar {x}" "baz"
/// ```
///
/// becomes `foobar {x}baz`.
fn concatenated_f_string(expr: &ast::ExprFString, locator: &Locator) -> String {
expr.value
.parts()
.filter_map(|part| {
raw_contents(locator.slice(part)).map(|s| s.escape_default().to_string())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Escapes are required for multi-line strings to escape the newline characters.

})
.collect()
}

fn matches_string_format_expression(expr: &Expr, semantic: &SemanticModel) -> bool {
match expr {
/// S608
pub(crate) fn hardcoded_sql_expression(checker: &mut Checker, expr: &Expr) {
let content = match expr {
// "select * from table where val = " + "str" + ...
// "select * from table where val = %s" % ...
Expr::BinOp(ast::ExprBinOp {
op: Operator::Add | Operator::Mod,
..
op: Operator::Add, ..
}) => {
// Only evaluate the full BinOp, not the nested components.
if semantic
if !checker
.semantic()
.current_expression_parent()
.map_or(true, |parent| !parent.is_bin_op_expr())
{
if any_over_expr(expr, &has_string_literal) {
return true;
}
return;
}
if !any_over_expr(expr, &Expr::is_string_literal_expr) {
return;
}
false
checker.generator().expr(expr)
}
// "select * from table where val = %s" % ...
Expr::BinOp(ast::ExprBinOp {
left,
op: Operator::Mod,
..
}) => {
Comment on lines +87 to +92
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is now a separate branch because there could be implicit concatenations on the left side of % operator.

let Some(string) = left.as_string_literal_expr() else {
return;
};
string.value.escape_default().to_string()
}
Expr::Call(ast::ExprCall { func, .. }) => {
let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = func.as_ref() else {
return false;
return;
};
// "select * from table where val = {}".format(...)
attr == "format" && string_literal(value).is_some()
if attr != "format" {
return;
}
let Some(string) = value.as_string_literal_expr() else {
return;
};
string.value.escape_default().to_string()
}
// f"select * from table where val = {val}"
Expr::FString(_) => true,
_ => false,
}
}
Expr::FString(f_string) => concatenated_f_string(f_string, checker.locator()),
_ => return,
};

/// S608
pub(crate) fn hardcoded_sql_expression(checker: &mut Checker, expr: &Expr) {
if matches_string_format_expression(expr, checker.semantic()) {
if matches_sql_statement(&checker.generator().expr(expr)) {
checker
.diagnostics
.push(Diagnostic::new(HardcodedSQLExpression, expr.range()));
}
if SQL_REGEX.is_match(&content) {
checker
.diagnostics
.push(Diagnostic::new(HardcodedSQLExpression, expr.range()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ pub(crate) fn hardcoded_tmp_directory(checker: &mut Checker, string: &ast::ExprS

checker.diagnostics.push(Diagnostic::new(
HardcodedTempFile {
string: string.value.clone(),
string: string.value.to_string(),
},
string.range,
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -854,11 +854,11 @@ pub(crate) fn suspicious_function_call(checker: &mut Checker, call: &ExprCall) {
["six", "moves", "urllib", "request", "urlopen" | "urlretrieve" | "Request"] => {
// If the `url` argument is a string literal, allow `http` and `https` schemes.
if call.arguments.args.iter().all(|arg| !arg.is_starred_expr()) && call.arguments.keywords.iter().all(|keyword| keyword.arg.is_some()) {
if let Some(Expr::StringLiteral(ast::ExprStringLiteral { value: url, .. })) = &call.arguments.find_argument("url", 0) {
let url = url.trim_start();
if url.starts_with("http://") || url.starts_with("https://") {
return None;
}
if let Some(Expr::StringLiteral(ast::ExprStringLiteral { value, .. })) = &call.arguments.find_argument("url", 0) {
let url = value.trim_start();
if url.starts_with("http://") || url.starts_with("https://") {
return None;
}
}
}
Some(SuspiciousURLOpenUsage.into())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ pub(crate) fn unnecessary_dict_kwargs(checker: &mut Checker, expr: &Expr, kwargs
fn as_kwarg(key: &Expr) -> Option<&str> {
if let Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) = key {
if is_identifier(value) {
return Some(value);
return Some(value.as_str());
}
}
None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ pub(crate) fn unrecognized_platform(checker: &mut Checker, test: &Expr) {
if !matches!(value.as_str(), "linux" | "win32" | "cygwin" | "darwin") {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an example where... we can probably do this without the concatenation? Imagine if we had a custom matcher that worked on implicit concatenations (i.e., a vector of strings). \cc @BurntSushi in case you have obvious ideas here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the closest "out of the box" experience you can get is stream searching with aho-corasick, but you need to provide a std::io::Read impl. Mildly annoying but doable in this context.

But I would say to write the simple/obvious code for now, and if this ends up popping up on a profile then we can revisit it and do something bespoke here or use aho-corasick.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would prefer to avoid the complexity for such simple cases but I do understand that this could be useful in similar cases where the strings are long.

checker.diagnostics.push(Diagnostic::new(
UnrecognizedPlatformName {
platform: value.clone(),
platform: value.to_string(),
},
right.range(),
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,13 @@ pub(super) fn is_empty_or_null_string(expr: &Expr) -> bool {
match expr {
Expr::StringLiteral(ast::ExprStringLiteral { value, .. }) => value.is_empty(),
Expr::NoneLiteral(_) => true,
Expr::FString(ast::ExprFString { values, .. }) => {
values.iter().all(is_empty_or_null_string)
Expr::FString(ast::ExprFString { value, .. }) => {
value.parts().all(|f_string_part| match f_string_part {
ast::FStringPart::Literal(literal) => literal.is_empty(),
ast::FStringPart::FString(f_string) => {
f_string.values.iter().all(is_empty_or_null_string)
}
})
}
_ => false,
}
Expand Down
Loading
Loading