From 3000a47fe83626914cda7394e4ab300582e70f2f Mon Sep 17 00:00:00 2001 From: Zanie Blue Date: Tue, 25 Jul 2023 12:06:47 -0500 Subject: [PATCH] Include file permissions in key for cached files (#5901) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reimplements https://github.com/astral-sh/ruff/pull/3104 Closes https://github.com/astral-sh/ruff/issues/5726 Note that we will generate the hash for a cache key twice in normal operation. Once to check for the cached item and again to update the cache. We could optimize this by generating the hash once in `diagnostics::lint_file` and passing the `u64` into `get` and `update`. We'd probably want to wrap it in a `CacheKeyHash` enum for type safety. ## Test plan Unit tests for Windows and Unix. Manual test with case from issue ``` ❯ touch fake.py ❯ chmod +x fake.py ❯ ./target/debug/ruff --select EXE fake.py fake.py:1:1: EXE002 The file is executable but no shebang is present Found 1 error. ❯ chmod -x fake.py ❯ ./target/debug/ruff --select EXE fake.py ``` --- Cargo.lock | 1 + crates/ruff_cli/Cargo.toml | 1 + .../test/fixtures/cache_mutable/source.py | 1 - crates/ruff_cli/src/cache.rs | 348 ++++++++++++------ crates/ruff_cli/src/diagnostics.rs | 51 ++- 5 files changed, 282 insertions(+), 120 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2cffcef08e51..89f6462ba675b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1995,6 +1995,7 @@ dependencies = [ "ruff", "ruff_cache", "ruff_diagnostics", + "ruff_macros", "ruff_python_ast", "ruff_python_formatter", "ruff_python_stdlib", diff --git a/crates/ruff_cli/Cargo.toml b/crates/ruff_cli/Cargo.toml index 21ef897706d58..983d02d85d46e 100644 --- a/crates/ruff_cli/Cargo.toml +++ b/crates/ruff_cli/Cargo.toml @@ -30,6 +30,7 @@ ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_formatter = { path = "../ruff_python_formatter" } ruff_text_size = { workspace = true } ruff_textwrap = { path = "../ruff_textwrap" } +ruff_macros = { path = "../ruff_macros" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } diff --git a/crates/ruff_cli/resources/test/fixtures/cache_mutable/source.py b/crates/ruff_cli/resources/test/fixtures/cache_mutable/source.py index 7e397f06e5744..83936dccb57f5 100644 --- a/crates/ruff_cli/resources/test/fixtures/cache_mutable/source.py +++ b/crates/ruff_cli/resources/test/fixtures/cache_mutable/source.py @@ -1,4 +1,3 @@ -# NOTE: sync with cache::invalidation test a = 1 __all__ = list(["a", "b"]) diff --git a/crates/ruff_cli/src/cache.rs b/crates/ruff_cli/src/cache.rs index fe614e7f8aedb..59494361ba842 100644 --- a/crates/ruff_cli/src/cache.rs +++ b/crates/ruff_cli/src/cache.rs @@ -164,21 +164,20 @@ impl Cache { path.strip_prefix(&self.package.package_root).ok() } - /// Get the cached results for a single file at relative `path`. This uses - /// `file_last_modified` to determine if the results are still accurate + /// Get the cached results for a single file at relative `path`. This + /// uses `key` to determine if the results are still accurate. /// (i.e. if the file hasn't been modified since the cached run). /// - /// This returns `None` if `file_last_modified` differs from the cached - /// timestamp or if the cache doesn't contain results for the file. - pub(crate) fn get( - &self, - path: &RelativePath, - file_last_modified: SystemTime, - ) -> Option<&FileCache> { + /// This returns `None` if `key` differs from the cached key or if the + /// cache doesn't contain results for the file. + pub(crate) fn get(&self, path: &RelativePath, key: &T) -> Option<&FileCache> { let file = self.package.files.get(path)?; + let mut hasher = CacheKeyHasher::new(); + key.cache_key(&mut hasher); + // Make sure the file hasn't changed since the cached run. - if file.last_modified != file_last_modified { + if file.key != hasher.finish() { return None; } @@ -188,10 +187,10 @@ impl Cache { } /// Add or update a file cache at `path` relative to the package root. - pub(crate) fn update( + pub(crate) fn update( &self, path: RelativePathBuf, - last_modified: SystemTime, + key: T, messages: &[Message], imports: &ImportMap, ) { @@ -218,8 +217,11 @@ impl Cache { }) .collect(); + let mut hasher = CacheKeyHasher::new(); + key.cache_key(&mut hasher); + let file = FileCache { - last_modified, + key: hasher.finish(), last_seen: AtomicU64::new(self.last_seen_cache), imports: imports.clone(), messages, @@ -244,8 +246,8 @@ struct PackageCache { /// On disk representation of the cache per source file. #[derive(Deserialize, Debug, Serialize)] pub(crate) struct FileCache { - /// Timestamp when the file was last modified before the (cached) check. - last_modified: SystemTime, + /// Key that determines if the cached item is still valid. + key: u64, /// Timestamp when we last linted this file. /// /// Represented as the number of milliseconds since Unix epoch. This will @@ -327,20 +329,27 @@ pub(crate) fn init(path: &Path) -> Result<()> { #[cfg(test)] mod tests { + use filetime::{set_file_mtime, FileTime}; use std::env::temp_dir; use std::fs; - use std::io::{self, Write}; + use std::io; + use std::io::Write; use std::path::{Path, PathBuf}; - use std::sync::atomic::AtomicU64; use std::time::SystemTime; + use itertools::Itertools; use ruff::settings::{flags, AllSettings}; use ruff_cache::CACHE_DIR_NAME; - use ruff_python_ast::imports::ImportMap; + use crate::cache::RelativePathBuf; use crate::cache::{self, Cache, FileCache}; use crate::diagnostics::{lint_path, Diagnostics}; + use std::sync::atomic::AtomicU64; + + use anyhow::Result; + use ruff_python_ast::imports::ImportMap; + #[test] fn same_results() { let mut cache_dir = temp_dir(); @@ -442,52 +451,99 @@ mod tests { } #[test] - fn invalidation() { - // NOTE: keep in sync with actual file. - const SOURCE: &[u8] = b"# NOTE: sync with cache::invalidation test\na = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + fn cache_adds_file_on_lint() { + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; - let mut cache_dir = temp_dir(); - cache_dir.push("ruff_tests/cache_invalidation"); - let _ = fs::remove_dir_all(&cache_dir); - cache::init(&cache_dir).unwrap(); + let test_cache = TestCache::new("cache_adds_file_on_lint"); + let cache = test_cache.open(); + test_cache.write_source_file("source.py", source); + assert_eq!(cache.new_files.lock().unwrap().len(), 0); - let settings = AllSettings::default(); - let package_root = fs::canonicalize("resources/test/fixtures/cache_mutable").unwrap(); - let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib); + cache.store().unwrap(); + let cache = test_cache.open(); + + test_cache + .lint_file_with_cache("source.py", &cache) + .expect("Failed to lint test file"); + assert_eq!( + cache.new_files.lock().unwrap().len(), + 1, + "A single new file should be added to the cache" + ); + + cache.store().unwrap(); + } + + #[test] + fn cache_adds_files_on_lint() { + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + + let test_cache = TestCache::new("cache_adds_files_on_lint"); + let cache = test_cache.open(); + test_cache.write_source_file("source_1.py", source); + test_cache.write_source_file("source_2.py", source); + assert_eq!(cache.new_files.lock().unwrap().len(), 0); + + cache.store().unwrap(); + let cache = test_cache.open(); + + test_cache + .lint_file_with_cache("source_1.py", &cache) + .expect("Failed to lint test file"); + test_cache + .lint_file_with_cache("source_2.py", &cache) + .expect("Failed to lint test file"); + assert_eq!( + cache.new_files.lock().unwrap().len(), + 2, + "Both files should be added to the cache" + ); + + cache.store().unwrap(); + } + + #[test] + fn cache_invalidated_on_file_modified_time() { + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + + let test_cache = TestCache::new("cache_invalidated_on_file_modified_time"); + let cache = test_cache.open(); + let source_path = test_cache.write_source_file("source.py", source); assert_eq!(cache.new_files.lock().unwrap().len(), 0); - let path = package_root.join("source.py"); - let mut expected_diagnostics = lint_path( - &path, - Some(&package_root), - &settings, - Some(&cache), - flags::Noqa::Enabled, - flags::FixMode::Generate, + let expected_diagnostics = test_cache + .lint_file_with_cache("source.py", &cache) + .expect("Failed to lint test file"); + + cache.store().unwrap(); + let cache = test_cache.open(); + + // Update the modified time of the file to a time in the future + set_file_mtime( + source_path, + FileTime::from_system_time(SystemTime::now() + std::time::Duration::from_secs(1)), ) .unwrap(); - assert_eq!(cache.new_files.lock().unwrap().len(), 1); - cache.store().unwrap(); + let got_diagnostics = test_cache + .lint_file_with_cache("source.py", &cache) + .expect("Failed to lint test file"); - let tests = [ - // File change. - (|path| { - let mut file = fs::OpenOptions::new() - .write(true) - .truncate(true) - .open(path)?; - file.write_all(SOURCE)?; - file.sync_data()?; - Ok(|_| Ok(())) - }) as fn(&Path) -> io::Result io::Result<()>>, - // Regression for issue #3086. - #[cfg(unix)] - |path| { - flip_execute_permission_bit(path)?; - Ok(flip_execute_permission_bit) - }, - ]; + assert_eq!( + cache.new_files.lock().unwrap().len(), + 1, + "Cache should not be used, the file should be treated as new and added to the cache" + ); + + assert_eq!( + expected_diagnostics, got_diagnostics, + "The diagnostics should not change" + ); + } + + #[test] + fn cache_invalidated_on_permission_change() { + // Regression test for issue #3086. #[cfg(unix)] #[allow(clippy::items_after_statements)] @@ -498,55 +554,62 @@ mod tests { file.set_permissions(PermissionsExt::from_mode(perms.mode() ^ 0o111)) } - for change_file in tests { - let cleanup = change_file(&path).unwrap(); + #[cfg(windows)] + #[allow(clippy::items_after_statements)] + fn flip_read_only_permission(path: &Path) -> io::Result<()> { + let file = fs::OpenOptions::new().write(true).open(path)?; + let mut perms = file.metadata()?.permissions(); + perms.set_readonly(!perms.readonly()); + file.set_permissions(perms) + } - let cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib); + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; - let mut got_diagnostics = lint_path( - &path, - Some(&package_root), - &settings, - Some(&cache), - flags::Noqa::Enabled, - flags::FixMode::Generate, - ) + let test_cache = TestCache::new("cache_invalidated_on_permission_change"); + let cache = test_cache.open(); + let path = test_cache.write_source_file("source.py", source); + assert_eq!(cache.new_files.lock().unwrap().len(), 0); + + let expected_diagnostics = test_cache + .lint_file_with_cache("source.py", &cache) .unwrap(); - cleanup(&path).unwrap(); + cache.store().unwrap(); + let cache = test_cache.open(); - assert_eq!( - cache.new_files.lock().unwrap().len(), - 1, - "cache must not be used" - ); + // Flip the permissions on the file + #[cfg(unix)] + flip_execute_permission_bit(&path).unwrap(); + #[cfg(windows)] + flip_read_only_permission(&path).unwrap(); - // Not store in the cache. - expected_diagnostics.source_kind.clear(); - got_diagnostics.source_kind.clear(); - assert!(expected_diagnostics == got_diagnostics); - } + let got_diagnostics = test_cache + .lint_file_with_cache("source.py", &cache) + .unwrap(); + + assert_eq!( + cache.new_files.lock().unwrap().len(), + 1, + "Cache should not be used, the file should be treated as new and added to the cache" + ); + + assert_eq!( + expected_diagnostics, got_diagnostics, + "The diagnostics should not change" + ); } #[test] - fn remove_old_files() { - let mut cache_dir = temp_dir(); - cache_dir.push("ruff_tests/cache_remove_old_files"); - let _ = fs::remove_dir_all(&cache_dir); - cache::init(&cache_dir).unwrap(); - - let settings = AllSettings::default(); - let package_root = - fs::canonicalize("resources/test/fixtures/cache_remove_old_files").unwrap(); - let mut cache = Cache::open(&cache_dir, package_root.clone(), &settings.lib); - assert_eq!(cache.new_files.lock().unwrap().len(), 0); + fn cache_removes_stale_files_on_store() { + let test_cache = TestCache::new("cache_removes_stale_files_on_store"); + let mut cache = test_cache.open(); - // Add a file to the cache that hasn't been linted or seen since the - // '70s! + // Add a file to the cache that hasn't been linted or seen since the '70s! + let old_path_key = RelativePathBuf::from("old.py"); cache.package.files.insert( - PathBuf::from("old.py"), + old_path_key, FileCache { - last_modified: SystemTime::UNIX_EPOCH, + key: 123, last_seen: AtomicU64::new(123), imports: ImportMap::new(), messages: Vec::new(), @@ -555,26 +618,97 @@ mod tests { ); // Now actually lint a file. - let path = package_root.join("source.py"); - lint_path( - &path, - Some(&package_root), - &settings, - Some(&cache), - flags::Noqa::Enabled, - flags::FixMode::Generate, - ) - .unwrap(); + let source: &[u8] = b"a = 1\n\n__all__ = list([\"a\", \"b\"])\n"; + test_cache.write_source_file("new.py", source); + let new_path_key = RelativePathBuf::from("new.py"); + assert_eq!(cache.new_files.lock().unwrap().len(), 0); + + test_cache + .lint_file_with_cache("new.py", &cache) + .expect("Failed to lint test file"); // Storing the cache should remove the old (`old.py`) file. cache.store().unwrap(); // So we when we open the cache again it shouldn't contain `old.py`. - let cache = Cache::open(&cache_dir, package_root, &settings.lib); + let cache = test_cache.open(); - assert_eq!(cache.package.files.len(), 1, "didn't remove the old file"); assert!( - !cache.package.files.contains_key(&path), - "removed the wrong file" + cache.package.files.keys().collect_vec() == vec![&new_path_key], + "Only the new file should be present" ); } + + struct TestCache { + cache_dir: PathBuf, + package_root: PathBuf, + settings: AllSettings, + } + + impl TestCache { + fn new(name: &str) -> Self { + // Build a new cache directory and clear it + let mut test_dir = temp_dir(); + test_dir.push("ruff_tests/cache"); + test_dir.push(name); + + let _ = fs::remove_dir_all(&test_dir); + + // Create separate directories for the cache and the test package + let cache_dir = test_dir.join("cache"); + let package_root = test_dir.join("package"); + + cache::init(&cache_dir).unwrap(); + fs::create_dir(package_root.clone()).unwrap(); + + let settings = AllSettings::default(); + + Self { + cache_dir, + package_root, + settings, + } + } + + fn write_source_file(&self, path: &str, contents: &[u8]) -> PathBuf { + let path = self.package_root.join(path); + let mut file = fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&*path) + .unwrap(); + file.write_all(contents).unwrap(); + file.sync_data().unwrap(); + path + } + + fn open(&self) -> Cache { + Cache::open( + &self.cache_dir, + self.package_root.clone(), + &self.settings.lib, + ) + } + + fn lint_file_with_cache( + &self, + path: &str, + cache: &Cache, + ) -> Result { + lint_path( + &self.package_root.join(path), + Some(&self.package_root), + &self.settings, + Some(cache), + flags::Noqa::Enabled, + flags::FixMode::Generate, + ) + } + } + + impl Drop for TestCache { + fn drop(&mut self) { + let _ = fs::remove_dir_all(&self.cache_dir); + } + } } diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index fcd0342106d66..29da5529b278a 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -6,13 +6,17 @@ use std::io::Write; use std::ops::AddAssign; use std::path::Path; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use colored::Colorize; +use filetime::FileTime; use log::{debug, error, warn}; use ruff_text_size::{TextRange, TextSize}; use rustc_hash::FxHashMap; use similar::TextDiff; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; +use crate::cache::Cache; use ruff::jupyter::Notebook; use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult}; use ruff::logging::DisplayParseError; @@ -23,11 +27,35 @@ use ruff::settings::{flags, AllSettings, Settings}; use ruff::source_kind::SourceKind; use ruff::{fs, IOError}; use ruff_diagnostics::Diagnostic; +use ruff_macros::CacheKey; use ruff_python_ast::imports::ImportMap; use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml}; -use crate::cache::Cache; +#[derive(CacheKey)] +pub(crate) struct FileCacheKey { + /// Timestamp when the file was last modified before the (cached) check. + file_last_modified: FileTime, + /// Permissions of the file before the (cached) check. + file_permissions_mode: u32, +} + +impl FileCacheKey { + fn from_path(path: &Path) -> io::Result { + // Construct a cache key for the file + let metadata = path.metadata()?; + + #[cfg(unix)] + let permissions = metadata.permissions().mode(); + #[cfg(windows)] + let permissions: u32 = metadata.permissions().readonly().into(); + + Ok(FileCacheKey { + file_last_modified: FileTime::from_last_modification_time(&metadata), + file_permissions_mode: permissions, + }) + } +} #[derive(Debug, Default, PartialEq)] pub(crate) struct Diagnostics { @@ -117,12 +145,16 @@ pub(crate) fn lint_path( let relative_path = cache .relative_path(path) .expect("wrong package cache for file"); - let last_modified = path.metadata()?.modified()?; - if let Some(cache) = cache.get(relative_path, last_modified) { + + let cache_key = FileCacheKey::from_path(path).context("Failed to create cache key")?; + + if let Some(cache) = cache.get(relative_path, &cache_key) { return Ok(cache.as_diagnostics(path)); } - Some((cache, relative_path, last_modified)) + // Stash the file metadata for later so when we update the cache it reflects the prerun + // information + Some((cache, relative_path, cache_key)) } _ => None, }; @@ -269,15 +301,10 @@ pub(crate) fn lint_path( let imports = imports.unwrap_or_default(); - if let Some((cache, relative_path, file_last_modified)) = caching { + if let Some((cache, relative_path, key)) = caching { // We don't cache parsing errors. if parse_error.is_none() { - cache.update( - relative_path.to_owned(), - file_last_modified, - &messages, - &imports, - ); + cache.update(relative_path.to_owned(), key, &messages, &imports); } }