Skip to content

Commit

Permalink
Implement --diff for Jupyter Notebooks (#6149)
Browse files Browse the repository at this point in the history
## Summary

Implement `--diff` for Jupyter Notebooks

## Test Plan

1. Use `crates/ruff/resources/test/fixtures/jupyter/isort.ipynb` as a
test case
and add a markdown cell in between the code cells to check that the diff
   outputs the correct cell index.
2. Run the command:
`cargo run --bin ruff --package ruff_cli -- check --no-cache --isolated
--select=ALL crates/ruff/resources/test/fixtures/jupyter/isort.ipynb
--fix --diff`

<details><summary>Example output:</summary>
<p>

```diff
--- /Users/dhruv/playground/ruff/notebooks/test.ipynb:cell 0
+++ /Users/dhruv/playground/ruff/notebooks/test.ipynb:cell 0
@@ -1,3 +0,0 @@
-from pathlib import Path
-import random
-import math
--- /Users/dhruv/playground/ruff/notebooks/test.ipynb:cell 4
+++ /Users/dhruv/playground/ruff/notebooks/test.ipynb:cell 4
@@ -1,5 +1,3 @@
-from typing import Any
-import collections
 # Newline should be added here
 def foo():
     pass

--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 8
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 8
@@ -1,8 +1,7 @@
 import pprint
 import tempfile
 
-from IPython import display
 import matplotlib.pyplot as plt
-
 import tensorflow as tf
-import tensorflow_datasets as tfds
+import tensorflow_datasets as tfds
+from IPython import display
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 10
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 10
@@ -1,5 +1,4 @@
 import tensorflow_models as tfm
 
 # These are not in the tfm public API for v2.9. They will be available in v2.10
-from official.vision.serving import export_saved_model_lib
-import official.core.train_lib
+from official.vision.serving import export_saved_model_lib
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 13
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 13
@@ -1,5 +1,5 @@
-exp_config = tfm.core.exp_factory.get_exp_config('resnet_imagenet')
-tfds_name = 'cifar10'
+exp_config = tfm.core.exp_factory.get_exp_config("resnet_imagenet")
+tfds_name = "cifar10"
 ds,ds_info = tfds.load(
 tfds_name,
 with_info=True)
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 15
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 15
@@ -6,12 +6,12 @@
 # Configure training and testing data
 batch_size = 128
 
-exp_config.task.train_data.input_path = ''
+exp_config.task.train_data.input_path = ""
 exp_config.task.train_data.tfds_name = tfds_name
-exp_config.task.train_data.tfds_split = 'train'
+exp_config.task.train_data.tfds_split = "train"
 exp_config.task.train_data.global_batch_size = batch_size
 
-exp_config.task.validation_data.input_path = ''
+exp_config.task.validation_data.input_path = ""
 exp_config.task.validation_data.tfds_name = tfds_name
-exp_config.task.validation_data.tfds_split = 'test'
+exp_config.task.validation_data.tfds_split = "test"
 exp_config.task.validation_data.global_batch_size = batch_size
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 17
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 17
@@ -1,16 +1,16 @@
 logical_device_names = [logical_device.name for logical_device in tf.config.list_logical_devices()]
 
-if 'GPU' in ''.join(logical_device_names):
-  print('This may be broken in Colab.')
-  device = 'GPU'
-elif 'TPU' in ''.join(logical_device_names):
-  print('This may be broken in Colab.')
-  device = 'TPU'
+if "GPU" in "".join(logical_device_names):
+  print("This may be broken in Colab.")
+  device = "GPU"
+elif "TPU" in "".join(logical_device_names):
+  print("This may be broken in Colab.")
+  device = "TPU"
 else:
-  print('Running on CPU is slow, so only train for a few steps.')
-  device = 'CPU'
+  print("Running on CPU is slow, so only train for a few steps.")
+  device = "CPU"
 
-if device=='CPU':
+if device=="CPU":
   train_steps = 20
   exp_config.trainer.steps_per_loop = 5
 else:
@@ -20,9 +20,9 @@
 exp_config.trainer.summary_interval = 100
 exp_config.trainer.checkpoint_interval = train_steps
 exp_config.trainer.validation_interval = 1000
-exp_config.trainer.validation_steps =  ds_info.splits['test'].num_examples // batch_size
+exp_config.trainer.validation_steps =  ds_info.splits["test"].num_examples // batch_size
 exp_config.trainer.train_steps = train_steps
-exp_config.trainer.optimizer_config.learning_rate.type = 'cosine'
+exp_config.trainer.optimizer_config.learning_rate.type = "cosine"
 exp_config.trainer.optimizer_config.learning_rate.cosine.decay_steps = train_steps
 exp_config.trainer.optimizer_config.learning_rate.cosine.initial_learning_rate = 0.1
 exp_config.trainer.optimizer_config.warmup.linear.warmup_steps = 100
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 21
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 21
@@ -1,14 +1,14 @@
 logical_device_names = [logical_device.name for logical_device in tf.config.list_logical_devices()]
 
 if exp_config.runtime.mixed_precision_dtype == tf.float16:
-    tf.keras.mixed_precision.set_global_policy('mixed_float16')
+    tf.keras.mixed_precision.set_global_policy("mixed_float16")
 
-if 'GPU' in ''.join(logical_device_names):
+if "GPU" in "".join(logical_device_names):
   distribution_strategy = tf.distribute.MirroredStrategy()
-elif 'TPU' in ''.join(logical_device_names):
+elif "TPU" in "".join(logical_device_names):
   tf.tpu.experimental.initialize_tpu_system()
-  tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='/device:TPU_SYSTEM:0')
+  tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="/device:TPU_SYSTEM:0")
   distribution_strategy = tf.distribute.experimental.TPUStrategy(tpu)
 else:
-  print('Warning: this will be really slow.')
+  print("Warning: this will be really slow.")
   distribution_strategy = tf.distribute.OneDeviceStrategy(logical_device_names[0])
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 23
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 23
@@ -1,5 +1,3 @@
 with distribution_strategy.scope():
   model_dir = tempfile.mkdtemp()
   task = tfm.core.task_factory.get_task(exp_config.task, logging_dir=model_dir)
-
-#  tf.keras.utils.plot_model(task.build_model(), show_shapes=True)
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 24
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 24
@@ -1,4 +1,4 @@
 for images, labels in task.build_inputs(exp_config.task.train_data).take(1):
   print()
-  print(f'images.shape: {str(images.shape):16}  images.dtype: {images.dtype!r}')
-  print(f'labels.shape: {str(labels.shape):16}  labels.dtype: {labels.dtype!r}')
+  print(f"images.shape: {images.shape!s:16}  images.dtype: {images.dtype!r}")
+  print(f"labels.shape: {labels.shape!s:16}  labels.dtype: {labels.dtype!r}")
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 27
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 27
@@ -1 +1 @@
-plt.hist(images.numpy().flatten());
+plt.hist(images.numpy().flatten())
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 29
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 29
@@ -1,2 +1,2 @@
-label_info = ds_info.features['label']
+label_info = ds_info.features["label"]
 label_info.int2str(1)
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 31
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 31
@@ -10,9 +10,6 @@
     if predictions is None:
       plt.title(label_info.int2str(labels[i]))
     else:
-      if labels[i] == predictions[i]:
-        color = 'g'
-      else:
-        color = 'r'
+      color = "g" if labels[i] == predictions[i] else "r"
       plt.title(label_info.int2str(predictions[i]), color=color)
     plt.axis("off")
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 35
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 35
@@ -1,3 +1,3 @@
-plt.figure(figsize=(10, 10));
+plt.figure(figsize=(10, 10))
 for images, labels in task.build_inputs(exp_config.task.validation_data).take(1):
   show_batch(images, labels)
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 37
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 37
@@ -1,7 +1,7 @@
 model, eval_logs = tfm.core.train_lib.run_experiment(
     distribution_strategy=distribution_strategy,
     task=task,
-    mode='train_and_eval',
+    mode="train_and_eval",
     params=exp_config,
     model_dir=model_dir,
     run_post_eval=True)
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 38
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 38
@@ -1 +0,0 @@
-#  tf.keras.utils.plot_model(model, show_shapes=True)
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 40
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 40
@@ -1,4 +1,4 @@
 for key, value in eval_logs.items():
     if isinstance(value, tf.Tensor):
       value = value.numpy()
-    print(f'{key:20}: {value:.3f}')
+    print(f"{key:20}: {value:.3f}")
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 42
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 42
@@ -4,5 +4,5 @@
 
 show_batch(images, labels, tf.cast(predictions, tf.int32))
 
-if device=='CPU':
-  plt.suptitle('The model was only trained for a few steps, it is not expected to do well.')
+if device=="CPU":
+  plt.suptitle("The model was only trained for a few steps, it is not expected to do well.")
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 45
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 45
@@ -1,8 +1,8 @@
 # Saving and exporting the trained model
 export_saved_model_lib.export_inference_graph(
-    input_type='image_tensor',
+    input_type="image_tensor",
     batch_size=1,
     input_image_size=[32, 32],
     params=exp_config,
     checkpoint_path=tf.train.latest_checkpoint(model_dir),
-    export_dir='./export/')
+    export_dir="./export/")
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 47
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 47
@@ -1,3 +1,3 @@
 # Importing SavedModel
-imported = tf.saved_model.load('./export/')
-model_fn = imported.signatures['serving_default']
+imported = tf.saved_model.load("./export/")
+model_fn = imported.signatures["serving_default"]
--- /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 49
+++ /Users/dhruv/playground/ruff/notebooks/image_classification.ipynb:cell 49
@@ -1,10 +1,10 @@
 plt.figure(figsize=(10, 10))
-for data in tfds.load('cifar10', split='test').batch(12).take(1):
+for data in tfds.load("cifar10", split="test").batch(12).take(1):
   predictions = []
-  for image in data['image']:
-    index = tf.argmax(model_fn(image[tf.newaxis, ...])['logits'], axis=1)[0]
+  for image in data["image"]:
+    index = tf.argmax(model_fn(image[tf.newaxis, ...])["logits"], axis=1)[0]
     predictions.append(index)
-  show_batch(data['image'], data['label'], predictions)
+  show_batch(data["image"], data["label"], predictions)
 
-  if device=='CPU':
-    plt.suptitle('The model was only trained for a few steps, it is not expected to do better than random.')
+  if device=="CPU":
+    plt.suptitle("The model was only trained for a few steps, it is not expected to do better than random.")

Would fix 61 errors.
```

</p>
</details> 

resolves: #4727
  • Loading branch information
dhruvmanila committed Jul 29, 2023
1 parent 4802c7c commit 3c99fbf
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 8 deletions.
20 changes: 20 additions & 0 deletions crates/ruff/src/jupyter/notebook.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::cmp::Ordering;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write};
use std::iter;
Expand Down Expand Up @@ -41,6 +42,20 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
Ok(String::from_utf8(writer)?)
}

impl Display for SourceValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SourceValue::String(string) => f.write_str(string),
SourceValue::StringArray(string_array) => {
for string in string_array {
f.write_str(string)?;
}
Ok(())
}
}
}
}

impl Cell {
/// Return the [`SourceValue`] of the cell.
fn source(&self) -> &SourceValue {
Expand Down Expand Up @@ -407,6 +422,11 @@ impl Notebook {
self.content = transformed.to_string();
}

/// Return a slice of [`Cell`] in the Jupyter notebook.
pub fn cells(&self) -> &[Cell] {
&self.raw.cells
}

/// Return `true` if the notebook is a Python notebook, `false` otherwise.
pub fn is_python_notebook(&self) -> bool {
self.raw
Expand Down
67 changes: 59 additions & 8 deletions crates/ruff_cli/src/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use similar::TextDiff;
use std::os::unix::fs::PermissionsExt;

use crate::cache::Cache;
use ruff::jupyter::Notebook;
use ruff::jupyter::{Cell, Notebook};
use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
use ruff::logging::DisplayParseError;
use ruff::message::Message;
Expand Down Expand Up @@ -261,13 +261,64 @@ pub(crate) fn lint_path(
}
},
flags::FixMode::Diff => {
let mut stdout = io::stdout().lock();
TextDiff::from_lines(contents.as_str(), &transformed)
.unified_diff()
.header(&fs::relativize_path(path), &fs::relativize_path(path))
.to_writer(&mut stdout)?;
stdout.write_all(b"\n")?;
stdout.flush()?;
match &source_kind {
SourceKind::Python(_) => {
let mut stdout = io::stdout().lock();
TextDiff::from_lines(contents.as_str(), &transformed)
.unified_diff()
.header(&fs::relativize_path(path), &fs::relativize_path(path))
.to_writer(&mut stdout)?;
stdout.write_all(b"\n")?;
stdout.flush()?;
}
SourceKind::Jupyter(dest_notebook) => {
// We need to load the notebook again, since we might've
// mutated it.
let src_notebook = match load_jupyter_notebook(path) {
Ok(notebook) => notebook,
Err(diagnostic) => return Ok(*diagnostic),
};
let mut stdout = io::stdout().lock();
for ((idx, src_cell), dest_cell) in src_notebook
.cells()
.iter()
.enumerate()
.zip(dest_notebook.cells().iter())
{
let (Cell::Code(src_code_cell), Cell::Code(dest_code_cell)) = (src_cell, dest_cell) else {
continue;
};
TextDiff::from_lines(
&src_code_cell.source.to_string(),
&dest_code_cell.source.to_string(),
)
.unified_diff()
// Jupyter notebook cells don't necessarily have a newline
// at the end. For example,
//
// ```python
// print("hello")
// ```
//
// For a cell containing the above code, there'll only be one line,
// and it won't have a newline at the end. If it did, there'd be
// two lines, and the second line would be empty:
//
// ```python
// print("hello")
//
// ```
.missing_newline_hint(false)
.header(
&format!("{}:cell {}", &fs::relativize_path(path), idx),
&format!("{}:cell {}", &fs::relativize_path(path), idx),
)
.to_writer(&mut stdout)?;
}
stdout.write_all(b"\n")?;
stdout.flush()?;
}
}
}
flags::FixMode::Generate => {}
}
Expand Down

0 comments on commit 3c99fbf

Please sign in to comment.