In [None]:
#| default_exp configuration

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
#| export

from typing import Optional
from typing_extensions import Self
from pathlib import Path
from importlib.metadata import version
from uuid import uuid4

from pydantic import Field, BaseModel, model_validator
from pydantic_settings import (
    BaseSettings,
    SettingsConfigDict,
    CliMutuallyExclusiveGroup,
    TomlConfigSettingsSource,
    CliApp,
    CliSuppress
)

import clip_plot # for version

In [None]:
#| export

class Paths(BaseModel):
    image_dir: Path | None = Field(None, description="Directory for input images")
    image_glob: str | None = Field(None, description="glob pattern for input images")
    table_file: Path | None = Field(None,
                               description="Path to csv or JSON table w/ image_path and embed_path columns (optionally: metadata)")
    table_glob: str | None = Field(None,
                               description="glob to csv or JSON table w/ image_path and embed_path columns (optionally: metadata)")
    meta_file: Path | None = Field(None, description="path to csv or JSON table with metadata")
    meta_glob: str | None = Field(None, description="glob pattern for to csv or JSON table with metadata")
    output_dir: Path | None = Field((Path()/"clipplot_output").resolve(),
            description="Directory for output files")

    @model_validator(mode='after')
    def check_source_exclusion(self) -> Self:
        if self.image_dir is not None and self.image_glob is not None:
            raise ValueError("'image_dir' and 'image_glob' are mutually exclusive.")
        if self.table_file is not None and self.table_glob is not None:
            raise ValueError("'table_file' and 'table_glob' are mutually exclusive.")
        if self.meta_file is not None and self.meta_glob is not None:
            raise ValueError("'meta_file' and 'meta_glob' are mutually exclusive.")
        return self

class UmapSpec(BaseModel):
    n_neighbors: list[int] = Field([15], description="Number of neighbors in UMAP")
    min_dist: list[float] = Field([0.1], description="Minimum distance in UMAP")
    metric: str = Field("correlation", exclude=True)
    umap_on_full_dims: bool = Field(True, exclude=True)

class ClusterSpec(BaseModel):
    n_clusters: int = Field(12, exclude=True)
    max_clusters: int = Field(10, exclude=True)
    min_cluster_size: int = Field(20, exclude=True)
    cluster_preproc_dims: int = Field(-1, exclude=True)

class Cfg(BaseSettings):
    thumbnail_size: int = Field(128, description="Size of images in main bedmap view")
    model_name: str = Field("timm/convnext_tiny.dinov3_lvd1689m",
                            description="Model name on huggingface.co/models")
    umap_spec: UmapSpec = UmapSpec()
    clipplot_version: str = Field(version(clip_plot.__name__), description="Version of clipplot")
    plot_id: str = Field(default_factory=lambda: str(uuid4()), description="Unique identifier for plot")
    paths: Paths = Paths()

    # excluded from CLI - it's a hairball
    seed: CliSuppress[int] = Field(42)
    geojson: CliSuppress[None | Path] = Field(None)
    shuffle: CliSuppress[None | bool] = Field(False)
    copy_web_only: CliSuppress[bool] = Field(False)
    use_cache: CliSuppress[bool] = Field(False)
    encoding: CliSuppress[str] = Field("utf8")
    pointgrid_fill: CliSuppress[float] = Field(0.05)
    cell_size: CliSuppress[int] = Field(32)
    lod_cell_height: CliSuppress[int] = Field(128)
    min_size: CliSuppress[int] = Field(100, description="min edge for image")
    gzip: CliSuppress[bool] = Field(False)
    logo: CliSuppress[None | Path] = Field(None)
    tagline: CliSuppress[None] | str = Field(None)
    # "min_score": 0.3,
    # "min_vertices": 18,


    model_config = SettingsConfigDict(
        env_prefix = "CLIPPLOT_",
        cli_parse_args = True,
        use_attribute_docstrings = True,
        cli_prog_name = "clipplot",
        cli_hide_none_type = True,
        cli_ignore_unknown_args=True,
        pyproject_toml_table_header=(),
    )

In [None]:
#| hide

cfg = Cfg()

In [None]:
old_version = cfg.model_dump()
old_version

{'thumbnail_size': 128,
 'model_name': 'timm/convnext_tiny.dinov3_lvd1689m',
 'umap_spec': {'n_neighbors': [15], 'min_dist': [0.1]},
 'clipplot_version': '0.1.1a7',
 'plot_id': 'd173b3f6-4a6e-4fa1-8c47-45ca264a51d4',
 'paths': {'image_dir': None,
  'image_glob': None,
  'table_file': None,
  'table_glob': None,
  'meta_file': None,
  'meta_glob': None,
  'output_dir': Path('/home/wsanger/git/clip-plot/nbs/clipplot_output')},
 'seed': 42,
 'geojson': None,
 'shuffle': False,
 'copy_web_only': False,
 'use_cache': False,
 'encoding': 'utf8',
 'pointgrid_fill': 0.05,
 'cell_size': 32,
 'lod_cell_height': 128,
 'min_size': 100,
 'gzip': False,
 'logo': None,
 'tagline': None}

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()