# Locate and Parse notebooks

Homemade Jupyter notebook parser + helper to locate notebooks to parse

In [66]:
//| export

import path from "node:path";
import { z } from "zod";
import type { Config } from "jurassic/config.ts";

In [67]:
//| export

const cellOutputDataSchema = z.object({
  "text/markdown": z.array(z.string()).optional(),
  "text/plain": z.array(z.string()).optional(),
});

const cellOutputSchema = z.object({
  text: z.array(z.string()).optional(),
  data: cellOutputDataSchema.optional(),
});

const cellSchema = z.object({
  cell_type: z.enum(["code", "markdown"]),
  source: z.array(z.string()),
  outputs: z.array(cellOutputSchema).optional(),
});
const nbSchema = z.object({ cells: z.array(cellSchema) });

export type Cell = z.infer<typeof cellSchema>;
export type Nb = z.infer<typeof nbSchema>;

export const loadNb = async (nbPath: string): Promise<Nb> =>
  nbSchema.parse(JSON.parse(await Deno.readTextFile(nbPath)));

# Parse cell output

Jurassic needs to be able to convert individual cell output to test that can be
displayed inside documentation

In [68]:
//| export

export const getCellOutput = (cell: Cell): string => {
  let result = "";
  if (!cell.outputs) return result;
  for (const output of cell.outputs) {
    if (output.text) {
      result += output.text.join("\n");
    }
    if (output.data) {
      const c = output.data["text/markdown"] || output.data["text/plain"] || [];
      for (const line of c) {
        result += line;
      }
    }
  }
  return result;
};

Some cells don't contain any output - return empty strings for those

In [69]:
getCellOutput({ cell_type: "code", source: ["//| export"] });

[32m""[39m

In [70]:
getCellOutput({ cell_type: "code", source: [] });

[32m""[39m

Cells can output text

In [71]:
getCellOutput({
  cell_type: "code",
  source: [
    'import { assertEquals } from "jsr:@std/assert";\n',
    "\n",
    'Deno.test("isDirective", () => {\n',
    '  assertEquals(isDirective("//| export"), true);\n',
    '  assertEquals(isDirective("const c = 1;"), false);\n',
    '  assertEquals(isDirective("// | export"), true);\n',
    '  assertEquals(isDirective("// |    export"), true);\n',
    "});\n",
  ],
  outputs: [
    {
      text: [
        "isDirective ... \x1b[0m\x1b[32mok\x1b[0m \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n",
        "\n",
        "\x1b[0m\x1b[32mok\x1b[0m | 1 passed | 0 failed \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n",
      ],
    },
  ],
});

[32m"isDirective ... \x1b[0m\x1b[32mok\x1b[0m \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m"\x1b[0m\x1b[32mok\x1b[0m | 1 passed | 0 failed \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n"[39m

Cells can output markdown

In [72]:
getCellOutput({
  cell_type: "code",
  source: [
    "await Deno.jupyter.display(\n",
    "  {\n",
    '    "text/markdown": "```ts\\n\\n" +\n',
    '      (await processNb(path.resolve("./export.ipynb"), "export.ipynb")) +\n',
    '      "\\n```",\n',
    "  },\n",
    "  { raw: true },\n",
    ");",
  ],
  outputs: [
    {
      data: {
        "text/markdown": [
          "```ts\n",
          "\n",
          "// 🦕 AUTOGENERATED! DO NOT EDIT! File to edit: export.ipynb\n",
          "\n",
          "\n",
          'import path from "node:path";\n',
          'import { getNotebooksToProcess, loadNb } from "jurassic/notebooks.ts";\n',
          'import type { Config } from "jurassic/config.ts";\n',
          'import type { Cell } from "jurassic/notebooks.ts";\n',
        ],
      },
    },
  ],
});

[32m"```ts\n"[39m +
  [32m"\n"[39m +
  [32m"// 🦕 AUTOGENERATED! DO NOT EDIT! File to edit: export.ipynb\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m'import path from "node:path";\n'[39m +
  [32m'import { getNotebooksToProcess, loadNb } from "jurassic/notebooks.ts";\n'[39m +
  [32m'import type { Config } from "jurassic/config.ts";\n'[39m +
  [32m'import type { Cell } from "jurassic/notebooks.ts";\n'[39m

In [73]:
// const nb = await loadNb("./export.ipynb");
// nb.cells[20]

In [74]:
//| export

export const getNotebooksToProcess = async (
  notebookPath: string,
  config: Config,
): Promise<string[]> => {
  const fullPath = path.join(config.nbsPath, notebookPath);
  const fileInfo = await Deno.stat(fullPath);
  const notebooksToProcess: string[] = [];

  if (fileInfo.isDirectory) {
    // if target is a directory, let's go through all files/directories inside
    for await (const file of await Deno.readDir(fullPath)) {
      if (file.isDirectory) {
        // got another directory? delegate to another getNotebooksToProcess
        const childNbs = await getNotebooksToProcess(
          path.join(notebookPath, file.name),
          config,
        );
        for (const nb of childNbs) {
          notebooksToProcess.push(nb);
        }
        continue;
      }

      // we are only interested in notebooks
      if (!file.name.endsWith(".ipynb")) continue;

      // relative path only, puhleeze
      notebooksToProcess.push(
        path.relative(config.nbsPath, path.join(fullPath, file.name)),
      );
    }
  }

  return notebooksToProcess;
};

Let's see what `getNotebooksToProcess` looks like for the current project:

In [75]:
import { getTestConfig } from "jurassic/config.ts";
await getNotebooksToProcess(".", getTestConfig("../"));


[
  [32m"utils.ipynb"[39m,
  [32m"submodule/hello.ipynb"[39m,
  [32m"docs.ipynb"[39m,
  [32m"notebooks.ipynb"[39m,
  [32m"config.ipynb"[39m,
  [32m"export.ipynb"[39m
]