From 5776ec1079c742b8c4b3eaf0931544cfc1d05389 Mon Sep 17 00:00:00 2001 From: bluthej Date: Mon, 30 Oct 2023 05:37:33 +0100 Subject: [PATCH] Sort imports by cached key (#7963) ## Summary Refactor for isort implementation. Closes #7738. I introduced a `NatOrdString` and a `NatOrdStr` type to have a naturally ordered `String` and `&str`, and I pretty much went back to the original implementation based on `module_key`, `member_key` and `sorted_by_cached_key` from itertools. I tried my best to avoid unnecessary allocations but it may have been clumsy in some places, so feedback is appreciated! I also renamed the `Prefix` enum to `MemberType` (and made some related adjustments) because I think this fits more what it is, and it's closer to the wording found in the isort documentation. I think the result is nicer to work with, and it should make implementing #1567 and the like easier :) Of course, I am very much open to any and all remarks on what I did! ## Test Plan I didn't add any test, I am relying on the existing tests since this is just a refactor. --- crates/ruff_linter/src/rules/isort/mod.rs | 34 ++- crates/ruff_linter/src/rules/isort/order.rs | 42 ++- crates/ruff_linter/src/rules/isort/sorting.rs | 240 ++++++++---------- crates/ruff_linter/src/rules/isort/types.rs | 1 + 4 files changed, 152 insertions(+), 165 deletions(-) diff --git a/crates/ruff_linter/src/rules/isort/mod.rs b/crates/ruff_linter/src/rules/isort/mod.rs index bbe155c6effa8..8c909046fbd8a 100644 --- a/crates/ruff_linter/src/rules/isort/mod.rs +++ b/crates/ruff_linter/src/rules/isort/mod.rs @@ -8,13 +8,14 @@ pub(crate) use categorize::categorize; use categorize::categorize_imports; pub use categorize::{ImportSection, ImportType}; use comments::Comment; +use itertools::Itertools; use normalize::normalize_imports; use order::order_imports; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_source_file::Locator; use settings::Settings; -use sorting::cmp_either_import; +use sorting::module_key; use types::EitherImport::{Import, ImportFrom}; use types::{AliasData, EitherImport, ImportBlock, TrailingComma}; @@ -173,17 +174,28 @@ fn format_import_block( let imports = order_imports(import_block, settings); - let imports = { - let mut imports = imports - .import - .into_iter() - .map(Import) - .chain(imports.import_from.into_iter().map(ImportFrom)) - .collect::>(); - if settings.force_sort_within_sections { - imports.sort_by(|import1, import2| cmp_either_import(import1, import2, settings)); - }; + let imports = imports + .import + .into_iter() + .map(Import) + .chain(imports.import_from.into_iter().map(ImportFrom)); + let imports: Vec = if settings.force_sort_within_sections { imports + .sorted_by_cached_key(|import| match import { + Import((alias, _)) => { + module_key(Some(alias.name), alias.asname, None, None, settings) + } + ImportFrom((import_from, _, _, aliases)) => module_key( + import_from.module, + None, + import_from.level, + aliases.first().map(|(alias, _)| (alias.name, alias.asname)), + settings, + ), + }) + .collect() + } else { + imports.collect() }; // Add a blank line between every section. diff --git a/crates/ruff_linter/src/rules/isort/order.rs b/crates/ruff_linter/src/rules/isort/order.rs index c2110457a0e8c..19710b0587fc4 100644 --- a/crates/ruff_linter/src/rules/isort/order.rs +++ b/crates/ruff_linter/src/rules/isort/order.rs @@ -1,9 +1,7 @@ -use std::cmp::Ordering; - use itertools::Itertools; use super::settings::Settings; -use super::sorting::{cmp_import_from, cmp_members, cmp_modules}; +use super::sorting::{member_key, module_key}; use super::types::{AliasData, CommentSet, ImportBlock, ImportFromStatement, OrderedImportBlock}; pub(crate) fn order_imports<'a>( @@ -13,12 +11,11 @@ pub(crate) fn order_imports<'a>( let mut ordered = OrderedImportBlock::default(); // Sort `Stmt::Import`. - ordered.import.extend( - block - .import - .into_iter() - .sorted_by(|(alias1, _), (alias2, _)| cmp_modules(alias1, alias2, settings)), - ); + ordered + .import + .extend(block.import.into_iter().sorted_by_cached_key(|(alias, _)| { + module_key(Some(alias.name), alias.asname, None, None, settings) + })); // Sort `Stmt::ImportFrom`. ordered.import_from.extend( @@ -53,27 +50,22 @@ pub(crate) fn order_imports<'a>( trailing_comma, aliases .into_iter() - .sorted_by(|(alias1, _), (alias2, _)| { - cmp_members(alias1, alias2, settings) + .sorted_by_cached_key(|(alias, _)| { + member_key(alias.name, alias.asname, settings) }) .collect::>(), ) }, ) - .sorted_by( - |(import_from1, _, _, aliases1), (import_from2, _, _, aliases2)| { - cmp_import_from(import_from1, import_from2, settings).then_with(|| { - match (aliases1.first(), aliases2.first()) { - (None, None) => Ordering::Equal, - (None, Some(_)) => Ordering::Less, - (Some(_), None) => Ordering::Greater, - (Some((alias1, _)), Some((alias2, _))) => { - cmp_members(alias1, alias2, settings) - } - } - }) - }, - ), + .sorted_by_cached_key(|(import_from, _, _, aliases)| { + module_key( + import_from.module, + None, + import_from.level, + aliases.first().map(|(alias, _)| (alias.name, alias.asname)), + settings, + ) + }), ); ordered } diff --git a/crates/ruff_linter/src/rules/isort/sorting.rs b/crates/ruff_linter/src/rules/isort/sorting.rs index 959d5f7a13628..2f6bdefc24e4a 100644 --- a/crates/ruff_linter/src/rules/isort/sorting.rs +++ b/crates/ruff_linter/src/rules/isort/sorting.rs @@ -1,171 +1,153 @@ /// See: -use std::cmp::Ordering; -use std::collections::BTreeSet; +use natord; +use std::cmp::Reverse; +use std::{borrow::Cow, cmp::Ordering}; use ruff_python_stdlib::str; use super::settings::{RelativeImportsOrder, Settings}; -use super::types::EitherImport::{Import, ImportFrom}; -use super::types::{AliasData, EitherImport, ImportFromData, Importable}; - -#[derive(PartialOrd, Ord, PartialEq, Eq, Copy, Clone)] -pub(crate) enum Prefix { - Constants, - Classes, - Variables, + +#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Copy, Clone)] +pub(crate) enum MemberType { + Constant, + Class, + Variable, } -fn prefix(name: &str, settings: &Settings) -> Prefix { +fn member_type(name: &str, settings: &Settings) -> MemberType { if settings.constants.contains(name) { // Ex) `CONSTANT` - Prefix::Constants + MemberType::Constant } else if settings.classes.contains(name) { // Ex) `CLASS` - Prefix::Classes + MemberType::Class } else if settings.variables.contains(name) { // Ex) `variable` - Prefix::Variables + MemberType::Variable } else if name.len() > 1 && str::is_cased_uppercase(name) { // Ex) `CONSTANT` - Prefix::Constants + MemberType::Constant } else if name.chars().next().is_some_and(char::is_uppercase) { // Ex) `Class` - Prefix::Classes + MemberType::Class } else { // Ex) `variable` - Prefix::Variables + MemberType::Variable } } -/// Compare two module names' by their `force-to-top`ness. -fn cmp_force_to_top(name1: &str, name2: &str, force_to_top: &BTreeSet) -> Ordering { - let force_to_top1 = force_to_top.contains(name1); - let force_to_top2 = force_to_top.contains(name2); - force_to_top1.cmp(&force_to_top2).reverse() +#[derive(Eq, PartialEq, Debug)] +pub(crate) struct NatOrdStr<'a>(Cow<'a, str>); + +impl Ord for NatOrdStr<'_> { + fn cmp(&self, other: &Self) -> Ordering { + natord::compare(&self.0, &other.0) + } } -/// Compare two top-level modules. -pub(crate) fn cmp_modules(alias1: &AliasData, alias2: &AliasData, settings: &Settings) -> Ordering { - cmp_force_to_top(alias1.name, alias2.name, &settings.force_to_top) - .then_with(|| { - if settings.case_sensitive { - natord::compare(alias1.name, alias2.name) - } else { - natord::compare_ignore_case(alias1.name, alias2.name) - .then_with(|| natord::compare(alias1.name, alias2.name)) - } - }) - .then_with(|| match (alias1.asname, alias2.asname) { - (None, None) => Ordering::Equal, - (None, Some(_)) => Ordering::Less, - (Some(_), None) => Ordering::Greater, - (Some(asname1), Some(asname2)) => natord::compare(asname1, asname2), - }) +impl PartialOrd for NatOrdStr<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } -/// Compare two member imports within `Stmt::ImportFrom` blocks. -pub(crate) fn cmp_members(alias1: &AliasData, alias2: &AliasData, settings: &Settings) -> Ordering { - match (alias1.name == "*", alias2.name == "*") { - (true, false) => Ordering::Less, - (false, true) => Ordering::Greater, - _ => { - if settings.order_by_type { - prefix(alias1.name, settings) - .cmp(&prefix(alias2.name, settings)) - .then_with(|| cmp_modules(alias1, alias2, settings)) - } else { - cmp_modules(alias1, alias2, settings) - } - } +impl<'a> From<&'a str> for NatOrdStr<'a> { + fn from(s: &'a str) -> Self { + NatOrdStr(Cow::Borrowed(s)) } } -/// Compare two relative import levels. -pub(crate) fn cmp_levels( - level1: Option, - level2: Option, - relative_imports_order: RelativeImportsOrder, -) -> Ordering { - match (level1, level2) { - (None, None) => Ordering::Equal, - (None, Some(_)) => Ordering::Less, - (Some(_), None) => Ordering::Greater, - (Some(level1), Some(level2)) => match relative_imports_order { - RelativeImportsOrder::ClosestToFurthest => level1.cmp(&level2), - RelativeImportsOrder::FurthestToClosest => level2.cmp(&level1), - }, +impl<'a> From for NatOrdStr<'a> { + fn from(s: String) -> Self { + NatOrdStr(Cow::Owned(s)) } } -/// Compare two `Stmt::ImportFrom` blocks. -pub(crate) fn cmp_import_from( - import_from1: &ImportFromData, - import_from2: &ImportFromData, +#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Copy, Clone)] +pub(crate) enum Distance { + Nearest(u32), + Furthest(Reverse), +} + +type ModuleKey<'a> = ( + Distance, + Option, + Option>, + Option>, + Option>, + Option>, +); + +/// Returns a comparable key to capture the desired sorting order for an imported module (e.g., +/// `foo` in `from foo import bar`). +pub(crate) fn module_key<'a>( + name: Option<&'a str>, + asname: Option<&'a str>, + level: Option, + first_alias: Option<(&'a str, Option<&'a str>)>, settings: &Settings, -) -> Ordering { - cmp_levels( - import_from1.level, - import_from2.level, - settings.relative_imports_order, - ) - .then_with(|| { - cmp_force_to_top( - &import_from1.module_name(), - &import_from2.module_name(), - &settings.force_to_top, - ) - }) - .then_with(|| match (&import_from1.module, import_from2.module) { - (None, None) => Ordering::Equal, - (None, Some(_)) => Ordering::Less, - (Some(_), None) => Ordering::Greater, - (Some(module1), Some(module2)) => { - if settings.case_sensitive { - natord::compare(module1, module2) - } else { - natord::compare_ignore_case(module1, module2) - } +) -> ModuleKey<'a> { + let distance = match settings.relative_imports_order { + RelativeImportsOrder::ClosestToFurthest => Distance::Nearest(level.unwrap_or_default()), + RelativeImportsOrder::FurthestToClosest => { + Distance::Furthest(Reverse(level.unwrap_or_default())) } - }) + }; + let force_to_top = name.map(|name| !settings.force_to_top.contains(name)); // `false` < `true` so we get forced to top first + let maybe_lowercase_name = name + .and_then(|name| (!settings.case_sensitive).then_some(NatOrdStr(maybe_lowercase(name)))); + let module_name = name.map(NatOrdStr::from); + let asname = asname.map(NatOrdStr::from); + let first_alias = first_alias.map(|(name, asname)| member_key(name, asname, settings)); + + ( + distance, + force_to_top, + maybe_lowercase_name, + module_name, + asname, + first_alias, + ) } -/// Compare an import to an import-from. -fn cmp_import_import_from( - import: &AliasData, - import_from: &ImportFromData, +type MemberKey<'a> = ( + bool, + Option, + Option>, + NatOrdStr<'a>, + Option>, +); + +/// Returns a comparable key to capture the desired sorting order for an imported member (e.g., +/// `bar` in `from foo import bar`). +pub(crate) fn member_key<'a>( + name: &'a str, + asname: Option<&'a str>, settings: &Settings, -) -> Ordering { - cmp_force_to_top( - import.name, - &import_from.module_name(), - &settings.force_to_top, +) -> MemberKey<'a> { + let not_star_import = name != "*"; // `false` < `true` so we get star imports first + let member_type = settings + .order_by_type + .then_some(member_type(name, settings)); + let maybe_lowercase_name = + (!settings.case_sensitive).then_some(NatOrdStr(maybe_lowercase(name))); + let module_name = NatOrdStr::from(name); + let asname = asname.map(NatOrdStr::from); + + ( + not_star_import, + member_type, + maybe_lowercase_name, + module_name, + asname, ) - .then_with(|| { - if settings.case_sensitive { - natord::compare(import.name, import_from.module.unwrap_or_default()) - } else { - natord::compare_ignore_case(import.name, import_from.module.unwrap_or_default()) - } - }) } -/// Compare two [`EitherImport`] enums which may be [`Import`] or [`ImportFrom`] -/// structs. -pub(crate) fn cmp_either_import( - a: &EitherImport, - b: &EitherImport, - settings: &Settings, -) -> Ordering { - match (a, b) { - (Import((alias1, _)), Import((alias2, _))) => cmp_modules(alias1, alias2, settings), - (ImportFrom((import_from, ..)), Import((alias, _))) => { - cmp_import_import_from(alias, import_from, settings).reverse() - } - (Import((alias, _)), ImportFrom((import_from, ..))) => { - cmp_import_import_from(alias, import_from, settings) - } - (ImportFrom((import_from1, ..)), ImportFrom((import_from2, ..))) => { - cmp_import_from(import_from1, import_from2, settings) - } +/// Lowercase the given string, if it contains any uppercase characters. +fn maybe_lowercase(name: &str) -> Cow<'_, str> { + if name.chars().all(char::is_lowercase) { + Cow::Borrowed(name) + } else { + Cow::Owned(name.to_lowercase()) } } diff --git a/crates/ruff_linter/src/rules/isort/types.rs b/crates/ruff_linter/src/rules/isort/types.rs index a6e6247ff3831..495a6303ab5a9 100644 --- a/crates/ruff_linter/src/rules/isort/types.rs +++ b/crates/ruff_linter/src/rules/isort/types.rs @@ -89,6 +89,7 @@ type ImportFrom<'a> = ( Vec>, ); +#[derive(Debug)] pub(crate) enum EitherImport<'a> { Import(Import<'a>), ImportFrom(ImportFrom<'a>),