# yak

> an opinonated, but configurable task generator for building JupyterLite sites

In [None]:
import graphlib
import json
import os
import re
import sys
from copy import deepcopy
from pathlib import Path
from typing import Any, Callable

import importnb
import traitlets as T
import yaml
from traitlets.utils.nested_update import nested_update

import jupyak

try:
    import tomllib
except:
    import tomli as tomllib

with importnb.Notebook():
    from jupyak.tasks import _well_known as W

## types

In [None]:
TRepoFactory = Callable[["Yak"], dict]

## traits

> lifted from `ipywidgets.trait_types`

In [None]:
class InstanceDict(T.Instance):

    """An instance trait which coerces a dict to an instance.

    This lets the instance be specified as a dict, which is used
    to initialize the instance.

    Also, we default to a trivial instance, even if args and kwargs
    is not specified.
    """

    def validate(self, obj, value):
        if isinstance(value, dict):
            return super().validate(obj, self.klass(**value))
        else:
            return super().validate(obj, value)

    def make_dynamic_default(self):
        return self.klass(*(self.default_args or ()), **(self.default_kwargs or {}))


class TypedTuple(T.Container):

    """A trait for a tuple of any length with type-checked elements."""

    klass = tuple
    _cast_types = (list,)

In [None]:
class UnicodeWithRegex(T.Unicode):

    """A regex-constrained string"""

    pattern: re.Pattern

    def __init__(
        self,
        default_value: "t.Any" = None,
        allow_none: "bool" = False,
        read_only: "bool | None" = None,
        help: "str | None" = None,
        config: "t.Any" = None,
        **kwargs: Any,
    ) -> None:
        pattern = kwargs.pop("pattern")
        super().__init__(
            default_value=default_value,
            allow_none=allow_none,
            read_only=read_only,
            help=help,
            config=config,
            **kwargs,
        )
        self.pattern = pattern

    @property
    def info_text(self):
        return f"a string matching {self.pattern}"

    def validate(self, obj: Any, value: Any) -> str | None:
        if re.findall(self.pattern, value):
            return value
        self.error(obj, value)

The pull request metadata.

In [None]:
class PullRequest(T.HasTraits):
    parent = T.Instance("jupyak.tasks._yak.Yak")
    title = T.Unicode("Untitled PR")
    description = T.Unicode("An undescribed PR")

Eject to the `pip` ecosystem.

In [None]:
class PipDeps(T.HasTraits):
    pip = TypedTuple(
        T.Unicode(),
        description="a PEP 508 description of a pip dependency",
    ).tag(sync=True)

A description of the conda environment for doing work.

In [None]:
class CondaEnv(T.HasTraits):
    parent = T.Instance("jupyak.tasks._yak.Yak")
    channels = TypedTuple(
        T.Unicode(),
        ("conda-forge", "nodefaults"),
        help="the priority-ordered set of conda URLs of channels to get packages",
    )
    variables = T.Dict(
        T.Unicode(),
        T.Unicode(),
        help="environment variables to set when the work environment is activated",
    )
    dependencies = TypedTuple(
        T.Union([T.Unicode(), T.Instance(PipDeps)]),
        (
            "python ==3.11.*",
            "nodejs ==20.*",
            "yarn ==3.6.*",
            "pip",
            "hatch",
            "hatch-jupyter-builder",
            "jupyter-packaging",
            "mypy",
            "python-build",
        ),
        help="the names of conda (or pip) packages to install",
    )
    pypi_to_conda = T.Dict().tag(sync=True)

    @T.default("pypi_to_conda")
    def _default_pypi_to_conda(self):
        return {
            "prometheus-client": "prometheus_client",
            "fastjsonschema": "python-fastjsonschema",
            "jupyterlab-pygments": "jupyterlab_pygments",
            "stack-data": "stack_data",
            "build": "python-build",
        }

    @property
    def environment_yml(self):
        return self.parent.work_path / W.ENVIRONMENT_YML

    @property
    def venv(self) -> Path:
        return self.parent.work_path / ".venv"

    @property
    def lab_share(self):
        return self.venv / "share/jupyter/lab"

    @property
    def venv_history(self) -> Path:
        return self.venv / W.CONDA_META_HISTORY

    @property
    def run_args(self) -> list[str]:
        return ["conda", "run", "--prefix", str(self.venv), "--live-stream"]

    @property
    def py_version_major(self):
        py_str = "python =="
        for dep in self.dependencies:
            if isinstance(dep, str) and dep.startswith(py_str):
                return ".".join(dep.split(py_str)[1].strip().split(".")[:2])
        raise ValueError(f"A python version not found in {dependencies}")

    @property
    def py_site_packages(self) -> Path:
        return self.venv / f"lib/python{self.py_version_major}/site-packages"

Information for how to clone the baseline and PRs for this repo

In [None]:
class GitHub(T.HasTraits):
    _re_github = r"https://github\.com/.*"

    parent = T.Instance("jupyak.tasks._yak.Yak")
    url = UnicodeWithRegex(
        pattern=_re_github,
        help="the URL of the repo",
        allow_none=False,
    )
    baseline = UnicodeWithRegex(
        pattern=_re_github,
        help="the URL of the baseline HEAD",
        allow_none=False,
    )
    merge_with = TypedTuple(
        UnicodeWithRegex(
            pattern=_re_github,
            allow_none=False,
        ),
        help="an optional, ordered list of branches to merge into the `baseline`",
    )

    @T.default("baseline")
    def _default_baseline(self):
        return f"{self.url}/tree/main"

JavaScript-related provisioning, building, and linking.

In [None]:
class JSOptions(T.HasTraits):
    parent = T.Instance("jupyak.tasks._yak.Repo")
    dependencies = TypedTuple(
        T.Unicode(),
        help="the names of other members of `repos` that need to built and linked into this repo in the JS environment",
    )
    link_exclude_patterns = TypedTuple(
        T.Unicode(),
        help="regular expressions for the npm `@org/pkg` names that should _not_ be linked into this repo",
    )
    dist_exclude_patterns = TypedTuple(
        T.Unicode(),
        help="regular expressions for the paths in this repo that should _not_ be built and linked in other repos",
    )
    tasks = T.Dict(help="task templates required to fulfill various js needs")
    install_exclude_resolutions = TypedTuple(
        T.Unicode(),
        help="regular expressions for the npm `@org/pkg` names that should _not_ be installed, ever, in this repo",
    )

    @property
    def package_jsons(self):
        return [self.root_package_json]

    @property
    def root_package_json(self):
        return self.parent.work_path / W.PACKAGE_JSON

    @property
    def yarn_state(self):
        return self.parent.work_path / W.YARN_STATE

    @property
    def yarn_lock(self):
        return self.parent.work_path / W.YARN_LOCK

    @property
    def all_install_exclude_resolutions(self):
        return sorted(
            set(
                [
                    *self.install_exclude_resolutions,
                    # test stuff
                    "@stdlib/stats",
                    "canvas",
                    "playwright",
                    "jest",
                    # build stuff
                    "verdaccio",
                    # linters
                    "@typescript-eslint/eslint-plugin",
                    "@typescript-eslint/parser",
                    "eslint",
                    "eslint-config-prettier",
                    "eslint-plugin-jest",
                    "eslint-plugin-prettier",
                    "eslint-plugin-react",
                    "prettier",
                    "stylelint",
                    "stylelint-config-prettier",
                    "stylelint-config-recommended",
                    "stylelint-config-standard",
                    "stylelint-csstree-validator",
                    "stylelint-prettier",
                ],
            ),
        )

Python-related provisioning, building, and linking.

In [None]:
class PythonOptions(T.HasTraits):
    parent: "Repo" = T.Instance("jupyak.tasks._yak.Repo")
    modules = TypedTuple(
        T.Unicode(),
        help="the importable python names provided by this repo",
    )
    dependencies = TypedTuple(
        T.Unicode(),
        help="the names of other members of `repos` that need to built and linked into this repo in the Python environment",
    )
    file_dep = TypedTuple(
        T.Unicode(),
        help="files needed (usually created by js tasks) before and editable python install",
    )
    pyproject_tomls = TypedTuple(
        T.Unicode(),
        help="pyproject toml files for installable packages",
    )
    lab_extensions = T.Dict(
        help="paths with extra file depenendencies needed to build an extension in this repo",
    )

    @T.default("pyproject_tomls")
    def _default_pyproject_tomls(self):
        return [W.PYPROJECT_TOML]

    @property
    def labextension_script(self):
        return self.parent.parent.work_path / "scripts/labextension.py"

In [None]:
class RepoLiteOptions(T.HasTraits):
    wheel = T.Bool(False, help="whether to build and ship a noarch wheel for pyodide")
    wheel_file_dep = T.Dict(help="extra files needed to build a given wheel").tag(
        sync=True,
    )
    needs_pth = TypedTuple(
        T.Unicode(),
        help="names of packages this repo provides that need to be installed before a lite site build",
    )
    skip_wheel_patterns = TypedTuple(T.Unicode(), help="paths to skip building wheels")

A description of a repo.

In [None]:
class Repo(T.HasTraits):
    parent = T.Instance("jupyak.tasks._yak.Yak")
    name = T.Unicode()
    github = InstanceDict(GitHub)
    js = InstanceDict(JSOptions, allow_none=True)
    py = InstanceDict(PythonOptions, allow_none=True)
    lite = InstanceDict(RepoLiteOptions, allow_none=True)
    variables = T.Dict(
        T.Unicode(),
        T.Unicode(),
        help="environment variables to set when the work environment is activated",
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        for child in [self.js, self.py]:
            if child:
                child.parent = self

    @property
    def work_path(self):
        return self.parent.work_path / "repos" / self.name

    @property
    def run_context(self):
        work_path = self.work_path
        return work_path, {"cwd": work_path, "env": self.run_env}

    @property
    def run_env(self):
        work_path = self.parent.work_path
        env = dict(os.environ)
        env.update(self.variables)
        env.update(
            # use a single global yarn config...
            YARNRC=self.parent.yarnrc_path,
            # ... but per-project nx junk...
            NX_CACHE_DIRECTORY=work_path / ".cache/nx" / self.name,
            NX_PROJECT_GRAPH_CACHE_DIRECTORY=work_path / ".cache/nx-graph" / self.name,
            # avoid expensive pep 517 behavior
            SKIP_JUPYTER_BUILDER=1,
            HATCH_JUPYTER_BUILDER_SKIP_NPM=1,
            JUPYTER_PACKAGING_SKIP_NPM=1,
        )
        return {k: str(v) for k, v in env.items()}

In [None]:
class LiteOptions(T.HasTraits):
    parent = T.Instance("jupyak.tasks._yak.Yak")

    gist = T.Unicode(
        help="a URL for a gist on GitHub to use as jupyterlite contents and config",
        allow_none=True,
    )

    @property
    def work_path(self) -> Path:
        return self.parent.work_path / "lite"

    @property
    def gist_path(self) -> Path:
        return self.parent.work_path / "repos" / "_lite_gist"

    @property
    def build_config_path(self) -> Path:
        return self.work_path / W.JUPYTER_LITE_CONFIG

    @property
    def run_config_path(self) -> Path:
        return self.work_path / W.JUPYTER_LITE_JSON

    @property
    def app_path(self) -> Path:
        return self.parent.dist_path / "lite"

    @property
    def app_shasums_path(self) -> Path:
        return self.app_path / W.SHA256SUMS

A description of a `jupyak` Pull request

In [None]:
class Yak(T.HasTraits):
    work_dir = T.Unicode().tag(sync=True)
    pr = InstanceDict(PullRequest)
    repos = T.Dict(value_trait=InstanceDict(Repo))
    env = InstanceDict(CondaEnv)
    issue = T.Dict()
    lite = InstanceDict(LiteOptions)

    _default_repos: dict[tuple[str, tuple[str]], TRepoFactory] = {}

    def __init__(self, issue: dict | None = None, **kwargs):
        issue = issue or Yak.find_config()

        kwargs["issue"] = deepcopy(
            nested_update({"repos": {}, "pr": {}, "env": {}, "lite": {}}, issue)
        )
        kwargs["pr"] = kwargs["issue"]["pr"]
        kwargs["env"] = kwargs["issue"]["env"]
        kwargs["lite"] = kwargs["issue"]["lite"]
        super().__init__(**kwargs)
        for child in [self.pr, self.env, self.lite]:
            child.parent = self
        for name, factory in self._sorted_default_repos():
            defaults = deepcopy(factory(self))
            issue_repo = deepcopy(self.issue["repos"].get(name, {}))
            self.repos[name] = Repo(**nested_update(defaults, issue_repo))
            self.repos[name].parent = self

    @property
    def work_path(self) -> Path:
        return Path(self.work_dir)

    @property
    def cache_path(self) -> Path:
        return self.work_path / ".cache"

    @property
    def yarn_cache_path(self) -> Path:
        return self.cache_path / "yarn"

    @property
    def yarnrc_path(self) -> Path:
        return self.work_path / W.YARNRC

    @property
    def build_path(self) -> Path:
        return self.work_path / "build"

    @property
    def dist_path(self) -> Path:
        return self.work_path / "dist"

    @property
    def py_repos(self):
        return {name: repo for name, repo in self.repos.items() if repo.py}

    @property
    def js_repos(self):
        return {name: repo for name, repo in self.repos.items() if repo.js}

    @property
    def not_a_package_json(self):
        return self.work_path / "build/not-a-package/package.json"

    @T.default("work_dir")
    def _default_work_dir(self):
        work_dir = os.environ.get("JPYK_WORK_DIR")
        if work_dir is None:
            work_dir = Path(jupyak.__file__).parent.parent.parent / "work"
        return str(work_dir)

    @classmethod
    def _sorted_default_repos(cls):
        graph = {}
        repos = cls._default_repos
        named_factories = {}
        for (name, deps), factory in repos.items():
            named_factories[name] = factory
            graph[name] = set(deps)
        for name in graphlib.TopologicalSorter(graph).static_order():
            yield name, named_factories[name]

    @classmethod
    def repo(cls, name: str, needs: tuple[str] | None = None):
        key = (name, needs or ())

        def _ensure(default_factory: TRepoFactory):
            if key not in cls._default_repos:
                cls._default_repos[key] = default_factory

        return _ensure

    @classmethod
    def find_config(cls):
        issue_path = None
        for candidate in [os.environ.get(W.ENV_VAR_CONFIG), *W.JPYK_CONFIGS]:
            if not candidate:
                continue
            if Path(candidate).exists():
                issue_path = Path(candidate).resolve()
        if issue_path and issue_path.exists():
            issue_text = issue_path.read_text(encoding="utf-8")
            suffix = issue_path.suffix
            if suffix in [".json"]:
                return json.loads(issue_text)
            if suffix in [".yml", ".yaml"]:
                return yaml.safe_load(issue_text)
            if suffix in [".toml"]:
                return tomllib.loads(issue_text)

            msg = f"{issue_path} exists, but could not be parsed"
            print(msg, file=sys.stderr)
            sys.exit(1)
        else:
            if not json.loads(
                os.environ.get(W.ENV_VAR_ALLOW_NO_CONFIG, "false").lower()
            ):
                msg = f"{W.ENV_VAR_CONFIG} resolved to missing file: {issue_path}"
                print(msg, file=sys.stderr)
                sys.exit(1)
            issue = {}
        return issue