# Locate and Parse notebooks

Homemade Jupyter notebook parser + helper to locate notebooks to parse

In [16]:
//| export

import path from "node:path";
import { z } from "zod";
import type { Config } from "jurassic/config.ts";

In [17]:
//| export

const cellSchema = z.object({
  cell_type: z.enum(["code", "markdown"]),
  source: z.array(z.string()),
});
const nbSchema = z.object({ cells: z.array(cellSchema) });

export type Cell = z.infer<typeof cellSchema>;
export type Nb = z.infer<typeof nbSchema>;

export const loadNb = async (nbPath: string): Promise<Nb> =>
  nbSchema.parse(JSON.parse(await Deno.readTextFile(nbPath)));

In [18]:
//| export

export const getNotebooksToProcess = async (
  notebookPath: string,
  config: Config,
): Promise<string[]> => {
  const fullPath = path.join(config.nbsPath, notebookPath);
  const fileInfo = await Deno.stat(fullPath);
  const notebooksToProcess: string[] = [];

  if (fileInfo.isDirectory) {
    // if target is a directory, let's go through all files/directories inside
    for await (const file of await Deno.readDir(fullPath)) {
      if (file.isDirectory) {
        // got another directory? delegate to another getNotebooksToProcess
        const childNbs = await getNotebooksToProcess(
          path.join(notebookPath, file.name),
          config,
        );
        for (const nb of childNbs) {
          notebooksToProcess.push(nb);
        }
        continue;
      }

      // we are only interested in notebooks
      if (!file.name.endsWith(".ipynb")) continue;

      // relative path only, puhleeze
      notebooksToProcess.push(
        path.relative(config.nbsPath, path.join(fullPath, file.name)),
      );
    }
  }

  return notebooksToProcess;
};

Let's see what `getNotebooksToProcess` looks like for the current project:

In [19]:
import { getTestConfig } from "jurassic/config.ts";
await getNotebooksToProcess(".", getTestConfig("../"));


[
  [32m"submodule/hello.ipynb"[39m,
  [32m"docs.ipynb"[39m,
  [32m"notebooks.ipynb"[39m,
  [32m"config.ipynb"[39m,
  [32m"export.ipynb"[39m
]