diff --git a/crates/ruff/src/jupyter/notebook.rs b/crates/ruff/src/jupyter/notebook.rs index 43238f5e2b549..a4289fd776c3e 100644 --- a/crates/ruff/src/jupyter/notebook.rs +++ b/crates/ruff/src/jupyter/notebook.rs @@ -1,7 +1,7 @@ use std::cmp::Ordering; use std::fmt::Display; use std::fs::File; -use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write}; +use std::io::{BufReader, BufWriter, Cursor, Read, Seek, SeekFrom, Write}; use std::iter; use std::path::Path; @@ -26,7 +26,7 @@ pub const JUPYTER_NOTEBOOK_EXT: &str = "ipynb"; /// Run round-trip source code generation on a given Jupyter notebook file path. pub fn round_trip(path: &Path) -> anyhow::Result { - let mut notebook = Notebook::read(path).map_err(|err| { + let mut notebook = Notebook::from_path(path).map_err(|err| { anyhow::anyhow!( "Failed to read notebook file `{}`: {:?}", path.display(), @@ -120,18 +120,30 @@ pub struct Notebook { impl Notebook { /// Read the Jupyter Notebook from the given [`Path`]. - /// - /// See also the black implementation - /// - pub fn read(path: &Path) -> Result> { - let mut reader = BufReader::new(File::open(path).map_err(|err| { + pub fn from_path(path: &Path) -> Result> { + Self::from_reader(BufReader::new(File::open(path).map_err(|err| { Diagnostic::new( IOError { message: format!("{err}"), }, TextRange::default(), ) - })?); + })?)) + } + + /// Read the Jupyter Notebook from its JSON string. + pub fn from_contents(contents: &str) -> Result> { + Self::from_reader(Cursor::new(contents)) + } + + /// Read a Jupyter Notebook from a [`Read`] implementor. + /// + /// See also the black implementation + /// + fn from_reader(mut reader: R) -> Result> + where + R: Read + Seek, + { let trailing_newline = reader.seek(SeekFrom::End(-1)).is_ok_and(|_| { let mut buf = [0; 1]; reader.read_exact(&mut buf).is_ok_and(|_| buf[0] == b'\n') @@ -144,7 +156,7 @@ impl Notebook { TextRange::default(), ) })?; - let raw_notebook: RawNotebook = match serde_json::from_reader(reader) { + let raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) { Ok(notebook) => notebook, Err(err) => { // Translate the error into a diagnostic @@ -159,14 +171,19 @@ impl Notebook { Category::Syntax | Category::Eof => { // Maybe someone saved the python sources (those with the `# %%` separator) // as jupyter notebook instead. Let's help them. - let contents = std::fs::read_to_string(path).map_err(|err| { - Diagnostic::new( - IOError { - message: format!("{err}"), - }, - TextRange::default(), - ) - })?; + let mut contents = String::new(); + reader + .rewind() + .and_then(|_| reader.read_to_string(&mut contents)) + .map_err(|err| { + Diagnostic::new( + IOError { + message: format!("{err}"), + }, + TextRange::default(), + ) + })?; + // Check if tokenizing was successful and the file is non-empty if lex(&contents, Mode::Module).any(|result| result.is_err()) { Diagnostic::new( @@ -182,7 +199,7 @@ impl Notebook { Diagnostic::new( SyntaxError { message: format!( - "Expected a Jupyter Notebook (.{JUPYTER_NOTEBOOK_EXT} extension), \ + "Expected a Jupyter Notebook (.{JUPYTER_NOTEBOOK_EXT}), \ which must be internally stored as JSON, \ but found a Python source file: {err}" ), @@ -484,22 +501,22 @@ mod tests { fn test_invalid() { let path = Path::new("resources/test/fixtures/jupyter/invalid_extension.ipynb"); assert_eq!( - Notebook::read(path).unwrap_err().kind.body, - "SyntaxError: Expected a Jupyter Notebook (.ipynb extension), \ + Notebook::from_path(path).unwrap_err().kind.body, + "SyntaxError: Expected a Jupyter Notebook (.ipynb), \ which must be internally stored as JSON, \ but found a Python source file: \ expected value at line 1 column 1" ); let path = Path::new("resources/test/fixtures/jupyter/not_json.ipynb"); assert_eq!( - Notebook::read(path).unwrap_err().kind.body, + Notebook::from_path(path).unwrap_err().kind.body, "SyntaxError: A Jupyter Notebook (.ipynb) must internally be JSON, \ but this file isn't valid JSON: \ expected value at line 1 column 1" ); let path = Path::new("resources/test/fixtures/jupyter/wrong_schema.ipynb"); assert_eq!( - Notebook::read(path).unwrap_err().kind.body, + Notebook::from_path(path).unwrap_err().kind.body, "SyntaxError: This file does not match the schema expected of Jupyter Notebooks: \ missing field `cells` at line 1 column 2" ); diff --git a/crates/ruff/src/test.rs b/crates/ruff/src/test.rs index b89ce5a80fd93..07e9c15ac167d 100644 --- a/crates/ruff/src/test.rs +++ b/crates/ruff/src/test.rs @@ -33,7 +33,7 @@ use crate::source_kind::SourceKind; #[cfg(not(fuzzing))] pub(crate) fn read_jupyter_notebook(path: &Path) -> Result { let path = test_resource_path("fixtures/jupyter").join(path); - Notebook::read(&path).map_err(|err| { + Notebook::from_path(&path).map_err(|err| { anyhow::anyhow!( "Failed to read notebook file `{}`: {:?}", path.display(), diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index 5ad6a50fd2788..f348468b1b23e 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -96,12 +96,14 @@ impl AddAssign for Diagnostics { } } -/// Returns either an indexed python jupyter notebook or a diagnostic (which is empty if we skip) -fn load_jupyter_notebook(path: &Path) -> Result> { - let notebook = match Notebook::read(path) { +/// Read a Jupyter Notebook from disk. +/// +/// Returns either an indexed Python Jupyter Notebook or a diagnostic (which is empty if we skip). +fn notebook_from_path(path: &Path) -> Result> { + let notebook = match Notebook::from_path(path) { Ok(notebook) => { if !notebook.is_python_notebook() { - // Not a python notebook, this could e.g. be an R notebook which we want to just skip + // Not a python notebook, this could e.g. be an R notebook which we want to just skip. debug!( "Skipping {} because it's not a Python notebook", path.display() @@ -126,6 +128,44 @@ fn load_jupyter_notebook(path: &Path) -> Result> { Ok(notebook) } +/// Parse a Jupyter Notebook from a JSON string. +/// +/// Returns either an indexed Python Jupyter Notebook or a diagnostic (which is empty if we skip). +fn notebook_from_contents( + contents: &str, + path: Option<&Path>, +) -> Result> { + let notebook = match Notebook::from_contents(contents) { + Ok(notebook) => { + if !notebook.is_python_notebook() { + // Not a python notebook, this could e.g. be an R notebook which we want to just skip. + if let Some(path) = path { + debug!( + "Skipping {} because it's not a Python notebook", + path.display() + ); + } + return Err(Box::default()); + } + notebook + } + Err(diagnostic) => { + // Failed to read the jupyter notebook + return Err(Box::new(Diagnostics { + messages: vec![Message::from_diagnostic( + *diagnostic, + SourceFileBuilder::new(path.map(Path::to_string_lossy).unwrap_or_default(), "") + .finish(), + TextSize::default(), + )], + ..Diagnostics::default() + })); + } + }; + + Ok(notebook) +} + /// Lint the source code at the given `Path`. pub(crate) fn lint_path( path: &Path, @@ -216,7 +256,7 @@ pub(crate) fn lint_path( // Read the file from disk let mut source_kind = if source_type.is_jupyter() { - match load_jupyter_notebook(path) { + match notebook_from_path(path) { Ok(notebook) => SourceKind::Jupyter(notebook), Err(diagnostic) => return Ok(*diagnostic), } @@ -278,7 +318,7 @@ pub(crate) fn lint_path( SourceKind::Jupyter(dest_notebook) => { // We need to load the notebook again, since we might've // mutated it. - let src_notebook = match load_jupyter_notebook(path) { + let src_notebook = match notebook_from_path(path) { Ok(notebook) => notebook, Err(diagnostic) => return Ok(*diagnostic), }; @@ -401,8 +441,19 @@ pub(crate) fn lint_stdin( noqa: flags::Noqa, autofix: flags::FixMode, ) -> Result { - let mut source_kind = SourceKind::Python(contents.to_string()); - let source_type = PySourceType::default(); + let source_type = path.map(PySourceType::from).unwrap_or_default(); + + let mut source_kind = if source_type.is_jupyter() { + // SAFETY: Jupyter isn't the default type, so we must have a path. + match notebook_from_contents(contents, path) { + Ok(notebook) => SourceKind::Jupyter(notebook), + Err(diagnostic) => return Ok(*diagnostic), + } + } else { + SourceKind::Python(contents.to_string()) + }; + + let contents = source_kind.content().to_string(); // Lint the inputs. let ( @@ -417,7 +468,7 @@ pub(crate) fn lint_stdin( transformed, fixed, }) = lint_fix( - contents, + &contents, path.unwrap_or_else(|| Path::new("-")), package, noqa, @@ -433,7 +484,7 @@ pub(crate) fn lint_stdin( flags::FixMode::Diff => { // But only write a diff if it's non-empty. if !fixed.is_empty() { - let text_diff = TextDiff::from_lines(contents, &transformed); + let text_diff = TextDiff::from_lines(contents.as_str(), &transformed); let mut unified_diff = text_diff.unified_diff(); if let Some(path) = path { unified_diff @@ -453,7 +504,7 @@ pub(crate) fn lint_stdin( } else { // If we fail to autofix, lint the original source code. let result = lint_only( - contents, + &contents, path.unwrap_or_else(|| Path::new("-")), package, settings, @@ -472,7 +523,7 @@ pub(crate) fn lint_stdin( } } else { let result = lint_only( - contents, + &contents, path.unwrap_or_else(|| Path::new("-")), package, settings, @@ -508,14 +559,21 @@ pub(crate) fn lint_stdin( mod tests { use std::path::Path; - use crate::diagnostics::{load_jupyter_notebook, Diagnostics}; + use crate::diagnostics::{notebook_from_contents, notebook_from_path, Diagnostics}; #[test] fn test_r() { let path = Path::new("../ruff/resources/test/fixtures/jupyter/R.ipynb"); - // No diagnostics is used as skip signal + // No diagnostics is used as skip signal. + assert_eq!( + notebook_from_path(path).unwrap_err(), + Box::::default() + ); + + let contents = std::fs::read_to_string(path).unwrap(); + // No diagnostics is used as skip signal. assert_eq!( - load_jupyter_notebook(path).unwrap_err(), + notebook_from_contents(&contents, Some(path)).unwrap_err(), Box::::default() ); } diff --git a/crates/ruff_cli/tests/integration_test.rs b/crates/ruff_cli/tests/integration_test.rs index 4d62a835d487b..bbb88ad0ecc36 100644 --- a/crates/ruff_cli/tests/integration_test.rs +++ b/crates/ruff_cli/tests/integration_test.rs @@ -81,6 +81,48 @@ Found 1 error. Ok(()) } +#[test] +fn stdin_source_type() -> Result<()> { + // Raise `TCH` errors in `.py` files. + let mut cmd = Command::cargo_bin(BIN_NAME)?; + let output = cmd + .args([ + "-", + "--format", + "text", + "--stdin-filename", + "TCH.py", + "--isolated", + ]) + .write_stdin("import os\n") + .assert() + .failure(); + assert_eq!( + str::from_utf8(&output.get_output().stdout)?, + r#"TCH.py:1:8: F401 [*] `os` imported but unused +Found 1 error. +[*] 1 potentially fixable with the --fix option. +"# + ); + + // But not in `.pyi` files. + let mut cmd = Command::cargo_bin(BIN_NAME)?; + cmd.args([ + "-", + "--format", + "text", + "--stdin-filename", + "TCH.pyi", + "--isolated", + "--select", + "TCH", + ]) + .write_stdin("import os\n") + .assert() + .success(); + Ok(()) +} + #[cfg(unix)] #[test] fn stdin_json() -> Result<()> {