# Locate and Parse notebooks

Homemade Jupyter notebook parser + helper to locate notebooks to parse

In [ ]:
//| export

import path from "node:path";
import { z } from "zod";
import { findDenoTests } from "jurassic/utils.ts";

In [ ]:
//| export

const cellOutputSchema = z.object({
  data: z.record(z.any()).optional(),
  execution_count: z.number().nullable().optional(),
  text: z.union([z.string(), z.array(z.string())]).optional(),
  metadata: z.record(z.any()).optional(),
});

export const isDirective = (ln: string): boolean =>
  ln.replaceAll(" ", "").startsWith("//|");

const cellSchema = z
  .object({
    cell_type: z.enum(["code", "markdown"]),
    source: z.array(z.string()),
    outputs: z.array(cellOutputSchema).optional(),
    metadata: z.record(z.any()).optional(),
  })
  .transform((data) => {
    return Object.assign(data, {
      isTestCell: data.cell_type === "code" &&
        findDenoTests(data.source.join("\n")).length > 0,
      isExportable: data.cell_type === "code" && data.source.length > 0 &&
        isDirective(data.source[0]) && data.source[0].includes("export"),
    });
  });
const nbSchema = z.object({
  filename: z.string(),
  metadata: z.record(z.any()).optional(),
  cells: z.array(cellSchema),
});

export type Cell = z.infer<typeof cellSchema>;
export type Nb = z.infer<typeof nbSchema>;

export const loadNb = async (nbPath: string): Promise<Nb> =>
  nbSchema.parse(
    Object.assign(
      { filename: nbPath },
      JSON.parse(await Deno.readTextFile(nbPath)),
    ),
  );

export const saveNb = async (nb: Nb): Promise<void> => {
  const { filename, ...content } = nb;
  await Deno.writeTextFile(filename, JSON.stringify(content, null, 2));
};

In [ ]:
import { assertEquals } from "jsr:@std/assert";

Deno.test("isDirective", () => {
  assertEquals(isDirective("//| export"), true);
  assertEquals(isDirective("const c = 1;"), false);
  assertEquals(isDirective("// | export"), true);
  assertEquals(isDirective("// |    export"), true);
});


In [ ]:
import { assertEquals } from "jsr:@std/assert";

Deno.test("isExportable", () => {
  assertEquals(
    cellSchema.parse({ cell_type: "code", source: ["//| export\n"] })
      .isExportable,
    true,
  );
  assertEquals(
    cellSchema.parse({ cell_type: "code", source: ["const c = 1;"] })
      .isExportable,
    false,
  );
  assertEquals(
    cellSchema.parse({ cell_type: "code", source: ["//|export\n"] })
      .isExportable,
    true,
  );
  assertEquals(
    cellSchema.parse({
      cell_type: "markdown",
      source: ["# showing //| export\n"],
    }).isExportable,
    false,
  );
  assertEquals(
    cellSchema.parse({ cell_type: "code", source: ["//|export"] }).isExportable,
    true,
  );
});

# Get notebook title

Try to get a human readable title for a notebook using the following approach:

- grab the first md cell in the notebook, and return first h1 inside of
- if this fails, return notebook filename

In [ ]:
//| export

export const getNbTitle = (nb: Nb): string => {
  const mds = nb.cells.length > 0 && nb.cells[0].cell_type === "markdown"
    ? nb.cells[0].source
    : null;
  const md = mds && mds.length > 0 && mds[0].trim().startsWith("# ")
    ? mds[0]
    : null;
  return md
    ? md.replace(/^# /, "").replaceAll("\n", "").trim()
    : path.basename(nb.filename);
};

In [ ]:
getNbTitle(await loadNb("./notebooks.ipynb"));

[32m"Locate and Parse notebooks"[39m

# Parse cell output

Jurassic needs to be able to convert individual cell output to test that can be
displayed inside documentation

In [ ]:
//| export

export const getCellOutput = (cell: Cell): string => {
  let result = "";
  if (!cell.outputs) return result;
  for (const output of cell.outputs) {
    if (output.text) {
      result += Array.isArray(output.text)
        ? output.text.join("\n")
        : output.text;
    }
    if (output.data) {
      const c = output.data["text/markdown"] || output.data["text/plain"] || [];
      for (const line of c) {
        result += line;
      }
    }
  }
  return result;
};

Some cells don't contain any output - return empty strings for those

In [ ]:
getCellOutput({ cell_type: "code", source: ["//| export"] });

[32m""[39m

In [ ]:
getCellOutput({ cell_type: "code", source: [] });

[32m""[39m

Cells can output text

In [ ]:
getCellOutput({
  cell_type: "code",
  source: [
    'import { assertEquals } from "jsr:@std/assert";\n',
    "\n",
    'Deno.test("isDirective", () => {\n',
    '  assertEquals(isDirective("//| export"), true);\n',
    '  assertEquals(isDirective("const c = 1;"), false);\n',
    '  assertEquals(isDirective("// | export"), true);\n',
    '  assertEquals(isDirective("// |    export"), true);\n',
    "});\n",
  ],
  outputs: [
    {
      text: [
        "isDirective ... \x1b[0m\x1b[32mok\x1b[0m \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n",
        "\n",
        "\x1b[0m\x1b[32mok\x1b[0m | 1 passed | 0 failed \x1b[0m\x1b[38;5;245m(0ms)\x1b[0m\n",
      ],
    },
  ],
});

Cells can output markdown

In [ ]:
getCellOutput({
  cell_type: "code",
  source: [
    "await Deno.jupyter.display(\n",
    "  {\n",
    '    "text/markdown": "```ts\\n\\n" +\n',
    '      (await processNb(path.resolve("./export.ipynb"), "export.ipynb")) +\n',
    '      "\\n```",\n',
    "  },\n",
    "  { raw: true },\n",
    ");",
  ],
  outputs: [
    {
      data: {
        "text/markdown": [
          "```ts\n",
          "\n",
          "// 🦕 AUTOGENERATED! DO NOT EDIT! File to edit: export.ipynb\n",
          "\n",
          "\n",
          'import path from "node:path";\n',
          'import { getNotebooksToProcess, loadNb } from "jurassic/notebooks.ts";\n',
          'import type { Config } from "jurassic/config.ts";\n',
          'import type { Cell } from "jurassic/notebooks.ts";\n',
        ],
      },
    },
  ],
});

[32m"```ts\n"[39m +
  [32m"\n"[39m +
  [32m"// 🦕 AUTOGENERATED! DO NOT EDIT! File to edit: export.ipynb\n"[39m +
  [32m"\n"[39m +
  [32m"\n"[39m +
  [32m'import path from "node:path";\n'[39m +
  [32m'import { getNotebooksToProcess, loadNb } from "jurassic/notebooks.ts";\n'[39m +
  [32m'import type { Config } from "jurassic/config.ts";\n'[39m +
  [32m'import type { Cell } from "jurassic/notebooks.ts";\n'[39m

In [ ]:
//| export

export const cleanNb = (nbPath: string) => {
  const d = JSON.parse(Deno.readTextFileSync(nbPath));
  Deno.writeTextFileSync(
    nbPath,
    JSON.stringify(
      Object.assign({}, d, {
        cells: d.cells.map((c: Cell) => {
          if (
            c.cell_type !== "code" ||
            !c.source.join("\n").includes("Deno.test")
          ) {
            return c;
          }
          return Object.assign({}, c, {
            outputs: [],
          });
        }),
      }),
    ),
  );
};

In [ ]:
import { assert } from "jsr:@std/assert";
import { getProjectRoot } from "jurassic/utils.ts";

Deno.test("cleanNb", async (t) => {
  const td = await Deno.makeTempDir({});

  await Deno.mkdir(`${td}/nbs`);
  Deno.copyFileSync(
    path.resolve(getProjectRoot(), "nbs/notebooks.ipynb"),
    `${td}/nbs/notebooks.ipynb`,
  );

  await t.step("clean Nb", () => {
    const nbPath = `${td}/nbs/notebooks.ipynb`;
    cleanNb(nbPath);

    const d = JSON.parse(Deno.readTextFileSync(nbPath));
    const cells: Cell[] = d.cells;

    for (const c of cells) {
      if (c.cell_type === "code" && c.source.join("\n").includes("Deno.test")) {
        assert((c.outputs || []).length === 0);
      }
    }
  });
});