# Build a Carpentries lesson using Jupyter Notebooks

This notebook creates R Markdown files from Jupyter Notebooks that can be used with the Carpentries Workbench. See the README for guidance about formatting the notebooks.

In [None]:
import glob
import json
import os
import re
from textwrap import wrap

import yaml

In [None]:
#: List of available fences
FENCES = {
    "callout",
    "challenge",
    "checklist",
    "discussion",
    "instructor",
    "keypoints",
    "objectives",
    "prereq",
    "questions",
    "solution",
    "testimonial",
    "continue",
}


class Cell:
    """Formats a notebook cell as R Markdown"""

    def __init__(self, cell):
        self.cell = cell

    def __str__(self):
        if self.fence == self.title.lower():
            return "\n".join(self.text.splitlines()[1:]).strip()
        return self.text.strip()

    @property
    def cell_type(self):
        """Gets the value for cell_type from the cell_dict"""
        return self.cell["cell_type"]

    @property
    def source(self):
        """Gets and trimes the value for source from the cell_dict"""
        source = self.cell["source"]
        while source and not source[0].strip():
            del source[0]
        return source

    @property
    def tags(self):
        """Gets the value for metadata.tags from the cell_dict"""
        return self.cell["metadata"].get("tags", [])

    @tags.setter
    def tags(self, tags):
        self.cell["metadata"]["tags"] = tags

    @property
    def title(self):
        """Gets the text of a markdown header"""
        return self.header.lstrip("# ")

    @property
    def header(self):
        """Gets the header of a markdown cell"""
        if (
            self.cell_type == "markdown"
            and self.source
            and self.source[0].startswith("#")
        ):
            return self.source[0].rstrip()
        return ""

    @property
    def header_level(self):
        """Gets the header level"""
        return len(re.match("#*", self.header).group())

    @property
    def text(self):
        """Gets the text of the cell"""
        text = "".join(self.source)
        if self.cell_type == "code":
            return text

        pattern = r"^( *)(\d+\.|\+|\-|\*) "

        # Wrap markdown
        num_chars = 72
        lines = []
        for line in text.splitlines():

            # Wrap lists
            match = re.match(pattern, line)
            if match:

                line = line[len(match.group()) :]

                bullet = match.group(2)
                if bullet in "+*":
                    bullet = "-"
                bullet += " " * (4 - len(bullet))

                init_indent = match.group(1) + bullet
                sub_indent = match.group(1) + "    "

                lines.extend(
                    wrap(
                        line,
                        num_chars - len(sub_indent),
                        initial_indent=init_indent,
                        subsequent_indent=sub_indent,
                    )
                )

            # Do not wrap tables
            elif line.startswith("|"):
                lines.append(line)

            # Wrap all other markdown
            else:
                lines.extend(wrap(line, num_chars) + [""])

        text = "\n".join(lines)

        # Single space code blocks
        for code_block in re.findall("```.*?```", text, flags=re.DOTALL):
            text = text.replace(code_block, re.sub(r"\n+", "\n", code_block), 1)

        return re.sub(r"\n{2,}", "\n\n", text)

    @property
    def fence(self):
        """Gets the name of the fence"""
        fences = list(set(self.tags) & set(FENCES))
        other = list(set(self.tags) - set(FENCES))
        if not fences and self.title.lower() in FENCES:
            self.tags = [self.title.lower()] + other
            return self.tags[0]
        return fences[0] if fences else None

    def is_part_of(self, block, always_part=None):
        """Checks if a cell is part of another block

        Parameters
        ----------
        block : Block
            the preceding block in the notebook
        always_part : list
            fence names that are always part of the preceding block

        Returns
        -------
        bool
            True if cell if part of the preceding block
        """
        return (
            self.fence == "continue"
            or always_part
            and self.fence in always_part
            or not block.fence
            and not self.fence
            and block.cell_type == self.cell_type
        )

    def to_markdown(self, include_fence=True):
        """Converts the cell to markdown

        Currently assumes all code cells are python.

        Parameters
        ----------
        include_fence : bool
            whether to fence the cell. Fences should generally be their
            cells. The exception is "solution", which are nested inside
            other fences.

        Returns
        -------
        str
            the cell formatted in Markdown
        """
        if str(self):
            if self.cell_type == "code":
                text = f"```{{python, error=TRUE}}\n{str(self)}\n```"
            else:
                text = str(self)
            if self.fence and include_fence:
                return fence(text, self.fence)
            return text.rstrip() + "\n"
        return ""


class Block:
    """Formats cell a group of related notebook cells as R Markdown"""

    def __init__(self, cells):
        self.cells = cells

    def __str__(self):
        return "\n".join([str(c) for c in self.cells])

    def __iter__(self):
        return iter(self.cells)

    @property
    def cell_type(self):
        """The cell_type of the first cell in the block"""
        return self.cells[0].cell_type

    @property
    def fence(self):
        """The fence type of the first cell in the block"""
        return self.cells[0].fence

    def to_markdown(self, include_fence=True):
        """Converts the block to markdown"""
        if self.fence != "solution" and "solution" in [c.fence for c in self.cells]:
            markdown = []
            for block in cells_to_blocks(self.cells):
                block_md = "\n".join([c.to_markdown(False) for c in block.cells])
                if block.fence == "solution":
                    block_md = fence(block_md, block.fence)
                markdown.append(block_md)
            return fence("\n".join(markdown), self.fence)
        markdown = "\n".join([c.to_markdown(False) for c in self.cells])
        if self.fence and include_fence:
            markdown = fence(markdown, self.fence)
        return markdown.rstrip() + "\n"


class Notebook:
    """Formats a notebook as R Markdown"""

    def __init__(self, path, **kwargs):
        self.path = path
        with open(path, **kwargs) as f:
            self.json = json.load(f)
        self.blocks = cells_to_blocks(self.json["cells"], always_part=["solution"])

    def __str__(self):
        content = "\n".join([b.to_markdown().rstrip("\n") + "\n" for b in self.blocks])
        content = re.sub("^# .*", "", content).strip()
        return f"---\n{yaml.dump(self.metadata)}---\n\n" + content.rstrip() + "\n"

    def __iter__(self):
        return iter(self.blocks)

    @property
    def metadata(self):
        """Extracts metadata about the episode"""
        metadata = {}
        for block in self.blocks:
            for cell in block:
                if cell.cell_type == "markdown" and cell.header_level == 1:
                    metadata["title"] = cell.title
        return metadata

    def to_markdown(self, path=None):
        """Writes notebook to markdown

        Parameters
        ----------
        path : str
            path for the markdown file

        Returns
        -------
        str
            notebook content as markdown
        """
        with open(path, "w") as f:
            f.write(str(self))
        return str(self)

    def to_notebook(self, path):
        """Writes notebook to notebook file

        Parameters
        ----------
        path : str
            path for the notebook file
        """
        cells = []
        for block in self.blocks:
            cells.extend([c.cell for c in block.cells])

        obj = self.json.copy()
        obj["cells"] = cells

        with open(path, "w") as f:
            json.dump(obj, f, indent=1, ensure_ascii=False)


def cells_to_blocks(cells, **kwargs):
    """Organizes individual cells into multi-cell blocks

    Parameters
    ----------
    cells : list
        a list of cells
    kwargs :
        keyword arguments to pass to cell.is_part_of()

    Returns
    -------
    list of Block
        list of cells grouped into blocks
    """
    blocks = []
    for cell in cells:
        if not isinstance(cell, Cell):
            cell = Cell(cell)
        if blocks and cell.is_part_of(blocks[-1], **kwargs):
            blocks[-1].cells.append(cell)
        else:
            blocks.append(Block([cell]))
    return blocks


def fence(text, fence, length=80):
    """Wraps text in a pretty-printed pandoc fence

    Parameters
    ----------
    text : str
        content of the fence
    fence : str
        name of the fence
    length : int
        total length of the fence divier

    Returns
    -------
    str
        fence
    """
    open_fence = f"::: {fence} " + ":" * (length - len(fence) - 5)
    return f"{open_fence}\n\n{text.rstrip()}\n\n{':' * length}\n"

In [None]:
# Remove markdown files in the episodes. Useful if you change a filename or need
# to refresh everything.
if True:
    for ext in ("*.md", "*.Rmd"):
        for dirname in ("episodes", "learners"):
            for path in glob.iglob(os.path.join(dirname, ext)):
                try:
                    os.remove(path)
                except FileNotFoundError:
                    pass

# Create new markdown files from the notebooks folder
for nb_path in sorted(glob.iglob(os.path.join("notebooks", "*.ipynb"))):

    root = os.path.splitext(os.path.basename(nb_path))[0]

    # The setup file lives in a different directory and has to be plain markdown
    if root == "setup":
        md_path = os.path.join("learners", f"{root}.md")
    else:
        md_path = os.path.join("episodes", f"{root}.Rmd")

    nb = Notebook(nb_path)

    # Save episode as markdown
    try:
        if os.path.getmtime(nb_path) > os.path.getmtime(md_path):
            raise FileNotFoundError
    except FileNotFoundError:

        # Write the tagged notebook back to the original path
        Notebook(nb_path).to_notebook(nb_path)

        # Write the markdown file
        print(f"Writing {md_path}")
        nb.to_markdown(md_path)