In [17]:
import numpy as np
import nbformat  # read notebooks
from nbconvert.preprocessors import ExecutePreprocessor  # execute notebooks

# Inference After Drop the Losers Selection

https://nbconvert.readthedocs.io/en/latest/api/preprocessors.html

https://github.com/jupyter/nbconvert/blob/master/nbconvert/preprocessors/base.py

This notebooks aims to apply selective inference to `drop-the-losers.ipynb` by parsing notebook cell metadata and using a custom sublass of `nbconvert.preprocessors.ExecutePreprocessor`.

## Custom Preprocessor Class Definition

https://github.com/jupyter/nbconvert/blob/master/nbconvert/preprocessors/execute.py

Subclassed from `ExecutePreprocessor`

### Prototype from meeting

In [18]:
class MyExecutePreprocessor(ExecutePreprocessor):
    """Notebook preprocessor for selective inference.
    """
    def run_cell(self, cell, cell_index):
        """Run the specified notebook cell. Note: `cell` and `cell_index` generally
        aren't given by the user, but are automatically passed in as arguments.
        """
        print('cell metadata', cell.source, cell_index)
        cell.source = 'x=3\n' + cell.source
        return ExecutePreprocessor.run_cell(self, cell, cell_index)

### Updated Version

This version of the preprocessor instead focuses on modifying `preprocess_cell()`.

The goal here is to save certain outputs into the `MyExecutePreprocessor` object's `resources` dictionary, based on the metadata of each cell.

In [19]:
class MyExecutePreprocessor(ExecutePreprocessor):
    """Notebook preprocessor for selective inference.
    """
    def preprocess_cell(self, cell, resources, cell_index):
        """Executes a single code cell. Must return modified cell and resource dictionary.
        
        Parameters
        ----------
        cell : NotebookNode cell
            Notebook cell being processed
        resources : dictionary
            Additional resources used in the conversion process.  Allows
            preprocessors to pass variables into the Jinja engine.
        index : int
            Index of the cell being processed
        """
        # Original code from execute.py
        if cell.cell_type != 'code' or not cell.source.strip():
            return cell, resources

        reply, outputs = self.run_cell(cell, cell_index)  # main info from cell
        
        # NEW CODE: updating `resources` based on metadata
        if cell.metadata == 'selected_vars':
            resources.update({'selected_vars': outputs[0]})
        
        # Backwards compatibility for processes that wrap run_cell
        cell.outputs = outputs

        cell_allows_errors = (self.allow_errors or "raises-exception"
                              in cell.metadata.get("tags", []))

        if self.force_raise_errors or not cell_allows_errors:
            for out in cell.outputs:
                if out.output_type == 'error':
                    raise CellExecutionError.from_cell_and_msg(cell, out)
            if (reply is not None) and reply['content']['status'] == 'error':
                raise CellExecutionError.from_cell_and_msg(cell, reply['content'])

        return cell, resources

## Custom Preprocessor Tests

https://github.com/jupyter/nbconvert/blob/master/nbconvert/preprocessors/base.py

According to the docstring for `preprocess()` in `nbconvert.preprocessors.base`,

 > If you wish to apply your preprocessing to each cell, you might want to override preprocess_cell method instead.

Therefore, we focus on writing a custom `preprocess_cell()` function in our subclass.

In [20]:
# Read the drop the losers notebook
nbpath = 'drop-the-losers.ipynb'
nb = nbformat.read(nbpath, nbformat.NO_CONVERT)

# Loop over notebook cells
for cell in nb['cells']:
    print(cell['metadata'])

{}
{}
{}
{'save_output': True}
{}
{}


### Preprocessing a Single Cell

In [21]:
# Docstring for preprocess_cell()
"""
Parameters
----------
cell : NotebookNode cell
    Notebook cell being processed
resources : dictionary
    Additional resources used in the conversion process.  Allows
    preprocessors to pass variables into the Jinja engine.
index : int
    Index of the cell being processed
"""

'\nParameters\n----------\ncell : NotebookNode cell\n    Notebook cell being processed\nresources : dictionary\n    Additional resources used in the conversion process.  Allows\n    preprocessors to pass variables into the Jinja engine.\nindex : int\n    Index of the cell being processed\n'

In [22]:
mypp = MyExecutePreprocessor(timeout=600, kernel_name='python3')
#mypp.preprocess(nb, resources={})
my_cell = nb['cells'][0]
mypp.preprocess_cell(my_cell, {}, 0)

AttributeError: 'MyExecutePreprocessor' object has no attribute 'kc'