# Splitting Jupyter Notebooks

Scenario: split [nbs/05_by_example.ipynb](https://github.com/AnswerDotAI/fasthtml/commit/a8bf08c1f83004dd16f68bb3bd160af87275f900) into multiple notebooks:
* Intro
* Full Example 1-4 each in their own notebook

I tried copy-pasting the cells into new notebooks using VS Code's Jupyter extension, but selection was buggy and base64 images embedded in the notebook were not copied over. 

I also tried viewing the source as text and manually constructing the json, but it was slow and mistake-prone.

Ultimately I had GPT-4o read the notebook from the GitHub URL and generate these scripts.

To create the intro notebook:

In [None]:
import json
import os
import re

# Path to the Jupyter notebook file within the local repository
notebook_path = "nbs/05_by_example.ipynb"

# Directory to save the split files
output_dir = "fh_by_example"

# Read the notebook content from the local file
with open(notebook_path, "r", encoding="utf-8") as f:
    notebook_content = json.load(f)

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Initialize variables for capturing the intro cells
intro_cells = []
found_first_example = False


def is_full_example_header(line):
    """Check if a line is a 'Full Example' header."""
    return line.lower().startswith("full example")


def create_notebook(cells):
    """Create a notebook structure from the given cells."""
    return {"cells": cells, "metadata": {}, "nbformat": 4, "nbformat_minor": 2}


def save_notebook(cells, filename):
    """Save the collected cells to a notebook file."""
    if not cells:
        return

    filepath = os.path.join(output_dir, filename)

    # Create the notebook structure
    notebook = create_notebook(cells)

    # Write the notebook content to the file
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(notebook, f, indent=2)

    print(f"Saved {filename}")


# Process each cell in the notebook
for cell in notebook_content["cells"]:
    if found_first_example:
        break

    cell_type = cell["cell_type"]
    source = "".join(cell["source"])

    if cell_type == "markdown":
        # Check for section headers
        for line in cell["source"]:
            if line.strip().startswith("#") and is_full_example_header(
                line.strip("# ").strip()
            ):
                found_first_example = True
                break

    if not found_first_example:
        intro_cells.append(cell)

# Save the intro section before the first "Full Example"
save_notebook(intro_cells, "00_intro.ipynb")

print("Intro section has been saved as 00_intro.ipynb.")

This is in scripts/split_fbhe_intro.py.

To create the example notebooks:

In [None]:
import json
import os
import re

# Path to the Jupyter notebook file within the local repository
notebook_path = "nbs/05_by_example.ipynb"

# Directory to save the split files
output_dir = "fh_by_example"

# Read the notebook content from the local file
with open(notebook_path, "r", encoding="utf-8") as f:
    notebook_content = json.load(f)

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Initialize variables for grouping cells by sections
section_name = None
section_cells = []
example_counter = 1


def is_full_example_header(line):
    """Check if a line is a 'Full Example' header."""
    return line.lower().startswith("full example")


def sanitize_filename(name):
    """Sanitize the section name to create a valid filename."""
    # Keep only alphanumeric characters and underscores
    sanitized = re.sub(r"\W+", "_", name)
    return sanitized.lower()


def create_notebook(cells):
    """Create a notebook structure from the given cells."""
    return {"cells": cells, "metadata": {}, "nbformat": 4, "nbformat_minor": 2}


def save_section(section_name, section_cells, example_counter):
    """Save the collected section cells to a notebook file."""
    if not section_name or not section_cells:
        return

    # Create a short filename from the section name
    short_name = sanitize_filename(section_name)
    filename = f"{example_counter:02d}_ex_{short_name}.ipynb"
    filepath = os.path.join(output_dir, filename)

    # Create the notebook structure
    notebook = create_notebook(section_cells)

    # Write the notebook content to the file
    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(notebook, f, indent=2)

    print(f"Saved {filename}")


# Process each cell in the notebook
for cell in notebook_content["cells"]:
    cell_type = cell["cell_type"]
    source = "".join(cell["source"])

    if cell_type == "markdown":
        # Check for section headers
        for line in cell["source"]:
            if line.strip().startswith("#") and is_full_example_header(
                line.strip("# ").strip()
            ):
                # Save the previous section if any
                save_section(section_name, section_cells, example_counter)

                # Start a new section
                section_name = line.strip("# ").strip()
                section_cells = [cell]
                example_counter += 1
                break
        else:
            # If no header, add the cell to the current section
            if section_name:
                section_cells.append(cell)
    else:
        # Add code cells to the current section
        if section_name:
            section_cells.append(cell)

# Save the last section if any
save_section(section_name, section_cells, example_counter)

print("Notebook has been split into separate files.")

This is in scripts/split_fbhe_examples.py.

Jeremy's tips:

FYI you might find our little API useful https://fastai.github.io/execnb/nbio.html

Also if you use nbclassic and the collapsible headings extension there’s some nice shortcuts for selecting whole sections. Then you can just copy a notebook and delete bits from each. 

* [nbclassic](https://github.com/jupyter/nbclassic)
* [Collapsible Headings](https://jupyter-contrib-nbextensions.readthedocs.io/en/latest/nbextensions/collapsible_headings/readme.html)

In [None]:
pip install nbclassic jupyter_contrib_nbextensions
jupyter contrib nbextension install --user