In [16]:
#| default_exp configuration

In [17]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
#| export

from typing import Literal, Any
from typing_extensions import Self 
from pathlib import Path
from importlib.metadata import version
from uuid import uuid4
from glob import glob

from pydantic import (
    Field, BaseModel,
    model_validator, field_validator, ValidationInfo
)
from pydantic_settings import (
    BaseSettings,
    SettingsConfigDict,
    TomlConfigSettingsSource,
    CliApp,
    CliSuppress
)

import clip_plot # for version

In [19]:
#| export

class Paths(BaseModel):
    images: list[Path] = Field([], description="Path to folder, image dir, or glob")
    tables: list[Path] | None = Field(None,
                               description="Glob of table(s) with image_path, embed_path cols")
    metadata: list[Path] | None = Field(None,
                               description="Glob of table(s) with image_path, embed_path cols")
    table_id: str = Field(default_factory=lambda: str(uuid4()), description="identifier for table output")
    output_dir: Path = Field((Path()/"clipplot_output").resolve(),
            description="Directory for output data files and viewer")
    table_format: Literal["parquet", "csv"] = Field("parquet", description="Format for table, `csv` or `parquet`")


    @field_validator("images", "tables", "metadata", mode="before")
    @classmethod
    def expand_paths(cls, value: Any, info: ValidationInfo) -> Any:
        if value is None:
            return None
        if isinstance(value, list) and len(value) == 1:
            value = value[0] # yikes unpack
        elif isinstance(value, list) and len(value) > 1:
            return value
        if "*" in str(value):
            return [Path(p) for p in glob(str(value), recursive=True)]
        elif Path(value).is_dir():
            if info.field_name == "images":
                exts = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif'}
            else:
                exts = {'.json', '.csv', '.parquet'}
            return [p for p in Path(value).rglob('*') if p.suffix.lower() in exts]

    @model_validator(mode='after')
    def check_table_vs_meta(self) -> Self:
        if self.metadata is not None and self.tables is not None:
            raise ValueError("'metadata' and 'tables' are mutually exclusive.")
        return self

In [20]:
#| export

class UmapSpec(BaseModel):
    n_neighbors: list[int] = Field([15], description="Number of neighbors in UMAP")
    min_dist: list[float] = Field([0.1], description="Minimum distance in UMAP")
    metric: CliSuppress[str] = Field("correlation")
    umap_on_full_dims: CliSuppress[bool] = Field(True)

In [21]:
#| export

class ClusterSpec(BaseModel):
    n_clusters: CliSuppress[int] = Field(12)
    max_clusters: CliSuppress[int] = Field(10)
    min_cluster_size: CliSuppress[int] = Field(20)

In [22]:
#| export

class Cfg(BaseSettings):
    thumbnail_size: int = Field(128, description="Size of images in main bedmap view")
    model: str = Field("timm/convnext_tiny.dinov3_lvd1689m",
                            description="Model name on huggingface.co/models")
    umap_spec: UmapSpec = UmapSpec()
    clipplot_version: str = Field(version(clip_plot.__name__), description="Version of clipplot")
    plot_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for plot")
    paths: Paths = Paths()

    # excluded from CLI - it's a hairball
    seed: CliSuppress[int] = Field(42)
    geojson: CliSuppress[None | Path] = Field(None)
    shuffle: CliSuppress[None | bool] = Field(False)
    copy_web_only: CliSuppress[bool] = Field(False)
    use_cache: CliSuppress[bool] = Field(False)
    encoding: CliSuppress[str] = Field("utf8")
    pointgrid_fill: CliSuppress[float] = Field(0.05)
    cell_size: CliSuppress[int] = Field(32)
    lod_cell_height: CliSuppress[int] = Field(128)
    min_size: CliSuppress[int] = Field(100, description="min edge for image")
    gzip: CliSuppress[bool] = Field(False)
    logo: CliSuppress[None | Path] = Field(None)
    tagline: CliSuppress[None] | str = Field(None)\

    model_config = SettingsConfigDict(
        env_prefix = "CLIPPLOT_",
        cli_parse_args = True,
        use_attribute_docstrings = True,
        cli_prog_name = "clipplot",
        cli_hide_none_type = True,
        cli_ignore_unknown_args=True,
        # pyproject_toml_table_header=(),
    )

In [23]:
#| hide

cfg = Cfg()

In [24]:
#|hide
import nbdev; nbdev.nbdev_export()