Skip to content

Commit

Permalink
Avoid cloning source code multiple times (#6629)
Browse files Browse the repository at this point in the history
## Summary

In working on #6628, I noticed
that we clone the source code contents, potentially multiple times,
prior to linting. The issue is that `SourceKind::Python` takes a
`String`, so we first have to provide it with a `String`. In the stdin
case, that means cloning. However, on top of this, we then have to clone
`source_kind.contents()` because `SourceKind` gets mutated. So for
stdin, we end up cloning twice. For non-stdin, we end up cloning once,
but unnecessarily (since the _contents_ don't get mutated, only the
kind).

This PR removes the `String` from `source_kind`, instead requiring that
we parse it out elsewhere. It reduces the number of clones down to 1 for
Jupyter Notebooks, and zero otherwise.
  • Loading branch information
charliermarsh committed Aug 18, 2023
1 parent 0cea497 commit 2aeb273
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 100 deletions.
22 changes: 11 additions & 11 deletions crates/ruff/src/jupyter/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
err
)
})?;
let code = notebook.content().to_string();
let code = notebook.source_code().to_string();
notebook.update_cell_content(&code);
let mut writer = Vec::new();
notebook.write_inner(&mut writer)?;
Expand Down Expand Up @@ -103,7 +103,7 @@ pub struct Notebook {
/// separated by a newline and a trailing newline. The trailing newline
/// is added to make sure that each cell ends with a newline which will
/// be removed when updating the cell content.
content: String,
source_code: String,
/// The index of the notebook. This is used to map between the concatenated
/// source code and the original notebook.
index: OnceCell<JupyterIndex>,
Expand Down Expand Up @@ -132,8 +132,8 @@ impl Notebook {
}

/// Read the Jupyter Notebook from its JSON string.
pub fn from_contents(contents: &str) -> Result<Self, Box<Diagnostic>> {
Self::from_reader(Cursor::new(contents))
pub fn from_source_code(source_code: &str) -> Result<Self, Box<Diagnostic>> {
Self::from_reader(Cursor::new(source_code))
}

/// Read a Jupyter Notebook from a [`Read`] implementor.
Expand Down Expand Up @@ -268,7 +268,7 @@ impl Notebook {
// The additional newline at the end is to maintain consistency for
// all cells. These newlines will be removed before updating the
// source code with the transformed content. Refer `update_cell_content`.
content: contents.join("\n") + "\n",
source_code: contents.join("\n") + "\n",
cell_offsets,
valid_code_cells,
trailing_newline,
Expand Down Expand Up @@ -404,8 +404,8 @@ impl Notebook {
/// Return the notebook content.
///
/// This is the concatenation of all Python code cells.
pub(crate) fn content(&self) -> &str {
&self.content
pub fn source_code(&self) -> &str {
&self.source_code
}

/// Return the Jupyter notebook index.
Expand All @@ -429,7 +429,7 @@ impl Notebook {
// it depends on the offsets to extract the cell content.
self.update_cell_offsets(source_map);
self.update_cell_content(transformed);
self.content = transformed.to_string();
self.source_code = transformed.to_string();
}

/// Return a slice of [`Cell`] in the Jupyter notebook.
Expand Down Expand Up @@ -482,8 +482,8 @@ mod tests {
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
let path = test_resource_path("fixtures/jupyter/cell").join(path);
let contents = std::fs::read_to_string(path)?;
Ok(serde_json::from_str(&contents)?)
let source_code = std::fs::read_to_string(path)?;
Ok(serde_json::from_str(&source_code)?)
}

#[test]
Expand Down Expand Up @@ -536,7 +536,7 @@ mod tests {
fn test_concat_notebook() -> Result<()> {
let notebook = read_jupyter_notebook(Path::new("valid.ipynb"))?;
assert_eq!(
notebook.content,
notebook.source_code,
r#"def unused_variable():
x = 1
y = 2
Expand Down
10 changes: 1 addition & 9 deletions crates/ruff/src/source_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,11 @@ use crate::jupyter::Notebook;

#[derive(Clone, Debug, PartialEq, is_macro::Is)]
pub enum SourceKind {
Python(String),
Python,
Jupyter(Notebook),
}

impl SourceKind {
/// Return the source content.
pub fn content(&self) -> &str {
match self {
SourceKind::Python(content) => content,
SourceKind::Jupyter(notebook) => notebook.content(),
}
}

/// Return the [`Notebook`] if the source kind is [`SourceKind::Jupyter`].
pub fn notebook(&self) -> Option<&Notebook> {
if let Self::Jupyter(notebook) = self {
Expand Down
34 changes: 18 additions & 16 deletions crates/ruff/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ use std::path::Path;
#[cfg(not(fuzzing))]
use anyhow::Result;
use itertools::Itertools;
use ruff_python_parser::lexer::LexResult;

use rustc_hash::FxHashMap;

use ruff_diagnostics::{AutofixKind, Diagnostic};
use ruff_python_ast::PySourceType;
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::AsMode;
use ruff_python_trivia::textwrap::dedent;
use ruff_source_file::{Locator, SourceFileBuilder};
Expand Down Expand Up @@ -53,7 +52,8 @@ pub(crate) fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<V
let path = test_resource_path("fixtures").join(path);
let contents = std::fs::read_to_string(&path)?;
Ok(test_contents(
&mut SourceKind::Python(contents),
&contents,
&mut SourceKind::Python,
&path,
settings,
))
Expand All @@ -65,14 +65,16 @@ pub(crate) fn test_notebook_path(
expected: impl AsRef<Path>,
settings: &Settings,
) -> Result<(Vec<Message>, SourceKind, SourceKind)> {
let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(path.as_ref())?);
let notebook = read_jupyter_notebook(path.as_ref())?;
let contents = notebook.source_code().to_string();
let mut source_kind = SourceKind::Jupyter(notebook);
let original_source_kind = source_kind.clone();
let messages = test_contents(&mut source_kind, path.as_ref(), settings);
let messages = test_contents(&contents, &mut source_kind, path.as_ref(), settings);
let expected_notebook = read_jupyter_notebook(expected.as_ref())?;
if let SourceKind::Jupyter(notebook) = &source_kind {
assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets());
assert_eq!(notebook.index(), expected_notebook.index());
assert_eq!(notebook.content(), expected_notebook.content());
assert_eq!(notebook.source_code(), expected_notebook.source_code());
};
Ok((messages, original_source_kind, source_kind))
}
Expand All @@ -81,11 +83,7 @@ pub(crate) fn test_notebook_path(
pub fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
let path = Path::new("<filename>");
let contents = dedent(contents);
test_contents(
&mut SourceKind::Python(contents.to_string()),
path,
settings,
)
test_contents(&contents, &mut SourceKind::Python, path, settings)
}

thread_local! {
Expand All @@ -102,11 +100,15 @@ pub(crate) fn max_iterations() -> usize {

/// A convenient wrapper around [`check_path`], that additionally
/// asserts that autofixes converge after a fixed number of iterations.
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
let contents = source_kind.content().to_string();
fn test_contents(
contents: &str,
source_kind: &mut SourceKind,
path: &Path,
settings: &Settings,
) -> Vec<Message> {
let source_type = PySourceType::from(path);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents, source_type.as_mode());
let locator = Locator::new(&contents);
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
let directives = directives::extract_directives(
Expand Down Expand Up @@ -225,7 +227,7 @@ Source with applied fixes:

let source_code = SourceFileBuilder::new(
path.file_name().unwrap().to_string_lossy().as_ref(),
contents.as_str(),
contents,
)
.finish();

Expand Down
Loading

0 comments on commit 2aeb273

Please sign in to comment.