# Locate and Parse notebooks

Homemade Jupyter notebook parser + helper to locate notebooks to parse

In [1]:
//| export

import path from "node:path";
import { z } from "zod";

In [2]:
//| export

const cellOutputDataSchema = z.object({
  "text/markdown": z.array(z.string()).optional(),
  "text/plain": z.array(z.string()).optional(),
});

const cellOutputSchema = z.object({
  text: z.array(z.string()).optional(),
  data: cellOutputDataSchema.optional(),
});

const cellSchema = z.object({
  cell_type: z.enum(["code", "markdown"]),
  source: z.array(z.string()),
  outputs: z.array(cellOutputSchema).optional(),
});
const nbSchema = z.object({ filename: z.string(), cells: z.array(cellSchema) });

export type Cell = z.infer<typeof cellSchema>;
export type Nb = z.infer<typeof nbSchema>;

export const loadNb = async (nbPath: string): Promise<Nb> =>
  nbSchema.parse(
    Object.assign(
      { filename: nbPath },
      JSON.parse(await Deno.readTextFile(nbPath)),
    ),
  );

# Get notebook title

Try to get a human readable title for a notebook using the following approach:

- grab the first md cell in the notebook, and return first h1 inside of
- if this fails, return notebook filename

In [3]:
//| export

export const getNbTitle = (nb: Nb): string => {
  const mds = nb.cells.length > 0 && nb.cells[0].cell_type === "markdown"
    ? nb.cells[0].source
    : null;
  const md = mds && mds.length > 0 && mds[0].trim().startsWith("# ")
    ? mds[0]
    : null;
  return md
    ? md.replace(/^# /, "").replaceAll("\n", "").trim()
    : path.basename(nb.filename);
};

In [4]:
getNbTitle(await loadNb("./notebooks.ipynb"));

[32m"Locate and Parse notebooks"[39m

# Parse cell output

Jurassic needs to be able to convert individual cell output to test that can be
displayed inside documentation

In [5]:
//| export

export const getCellOutput = (cell: Cell): string => {
  let result = "";
  if (!cell.outputs) return result;
  for (const output of cell.outputs) {
    if (output.text) {
      result += output.text.join("\n");
    }
    if (output.data) {
      const c = output.data["text/markdown"] || output.data["text/plain"] || [];
      for (const line of c) {
        result += line;
      }
    }
  }
  return result;
};

Some cells don't contain any output - return empty strings for those

In [6]:
getCellOutput({ cell_type: "code", source: ["//| export"] });

[32m""[39m

In [7]:
getCellOutput({ cell_type: "code", source: [] });

[32m""[39m

Cells can output text

In [8]:
getCellOutput({
  cell_type: "code",
  source: [
    'import { assertEquals } from "jsr:@std/assert";\n',
    "\n",
    'Deno.test("isDirective", () => {\n',
    '  assertEquals(isDirective("//| export"), true);\n',
    '  assertEquals(isDirective("const c = 1;"), false);\n',
    '  assertEquals(isDirective("// | export"), true);\n',
    '  assertEquals(isDirective("// |    export"), true);\n',
    "});\n",
  ],
  outputs: [
    {
      text: [
        "isDirective ... \x1b[0m\x1b[32mok\x1b[0m \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n",
        "\n",
        "\x1b[0m\x1b[32mok\x1b[0m | 1 passed | 0 failed \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n",
      ],
    },
  ],
});

[32m"isDirective ... \x1b[0m\x1b[32mok\x1b[0m \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m"\x1b[0m\x1b[32mok\x1b[0m | 1 passed | 0 failed \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n"[39m

Cells can output markdown

In [9]:
getCellOutput({
  cell_type: "code",
  source: [
    "await Deno.jupyter.display(\n",
    "  {\n",
    '    "text/markdown": "```ts\\n\\n" +\n',
    '      (await processNb(path.resolve("./export.ipynb"), "export.ipynb")) +\n',
    '      "\\n```",\n',
    "  },\n",
    "  { raw: true },\n",
    ");",
  ],
  outputs: [
    {
      data: {
        "text/markdown": [
          "```ts\n",
          "\n",
          "// 🦕 AUTOGENERATED! DO NOT EDIT! File to edit: export.ipynb\n",
          "\n",
          "\n",
          'import path from "node:path";\n',
          'import { getNotebooksToProcess, loadNb } from "jurassic/notebooks.ts";\n',
          'import type { Config } from "jurassic/config.ts";\n',
          'import type { Cell } from "jurassic/notebooks.ts";\n',
        ],
      },
    },
  ],
});

[32m"```ts\n"[39m +
  [32m"\n"[39m +
  [32m"// 🦕 AUTOGENERATED! DO NOT EDIT! File to edit: export.ipynb\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m'import path from "node:path";\n'[39m +
  [32m'import { getNotebooksToProcess, loadNb } from "jurassic/notebooks.ts";\n'[39m +
  [32m'import type { Config } from "jurassic/config.ts";\n'[39m +
  [32m'import type { Cell } from "jurassic/notebooks.ts";\n'[39m