## Toy example

In [1]:
from dataclasses import dataclass
from omegaconf import MISSING

class Optimizer:
    def __init__(self, algo: str, lr: float) -> None:
        self.algo = algo
        self.lr = lr

@dataclass
class OptimizerConfig:
    algo: str = "SGD"
    lr: float = MISSING
    _target_: str = f"{__name__}.Optimizer"

class Dataset:
    def __init__(self, name: str, path: str) -> None:
        self.name = name
        self.path = path

@dataclass
class DatasetConfig:
    name: str = "clinc"
    path: str = "./clinc.json"
    _target_: str = f"{__name__}.Dataset"



class Trainer:
    def __init__(self, optimizer: Optimizer, datasets: list[Dataset]) -> None:
        self.optimizer = optimizer
        self.datasets = datasets


@dataclass
class TrainerConfig:
    optimizer: OptimizerConfig = MISSING
    datasets: list[DatasetConfig] = MISSING
    _target_: str = f"{__name__}.Trainer"

In [2]:
dct = {
    # "_target_": f"{__name__}.Trainer",
    "optimizer": {
        # "_target_": f"{__name__}.Optimizer",
        "algo": "SGD",
        "lr": 1e-3,
    },
    "datasets": [{
        # "_target_": f"{__name__}.Dataset",
        "name": "clinc",
        "path": "./clinc.json",
    }],
}

In [3]:
from hydra.utils import instantiate

trainer: Trainer = instantiate(TrainerConfig(), **dct)
# trainer: Trainer = instantiate(dct)
trainer

<__main__.Trainer at 0x79f32bf76450>

## Pipeline

In [1]:
from hydra.utils import instantiate
from autointent import Context
from autointent.pipeline.optimization import get_db_dir, get_run_name, load_data

run_name = get_run_name("multiclass-cpu")
db_dir = get_db_dir("", run_name)

data = load_data("/home/voorhs/repos/AutoIntent/tests/minimal-optimization/data/clinc_subset.json", multilabel=False)
context = Context(
    multiclass_intent_records=data,
    multilabel_utterance_records=[],
    test_utterance_records=[],
    device="cpu",
    mode="multiclass_as_multilabel",
    multilabel_generation_config="",
    db_dir=db_dir,
    regex_sampling=0,
    seed=0,
)

In [2]:
import importlib.resources as ires
from pathlib import Path
from logging import Logger
import yaml


def load_config(config_path: str, multilabel: bool, logger: Logger | None = None):
    """load config from the given path or load default config which is distributed along with the autointent package"""
    if config_path != "":
        if logger is not None:
            logger.debug("loading optimization search space config from %s...)", config_path)
        with Path(config_path).open() as file:
            file_content = file.read()
    else:
        if logger is not None:
            logger.debug("loading default optimization search space config...")
        config_name = "default-multilabel-config.yaml" if multilabel else "default-multiclass-config.yaml"
        with ires.files("autointent.datafiles").joinpath(config_name).open() as file:
            file_content = file.read()
    return yaml.safe_load(file_content)

In [3]:
config = load_config("/home/voorhs/repos/AutoIntent/autointent/datafiles/default-multilabel-config.yaml", multilabel=True)

In [4]:
from pprint import pprint
pprint(config)

{'nodes': [{'metric': 'retrieval_hit_rate_intersecting',
            'node_type': 'retrieval',
            'search_space': [{'k': [10],
                              'model_name': ['deepvk/USER-bge-m3'],
                              'module_type': 'vector_db'}]},
           {'metric': 'scoring_roc_auc',
            'node_type': 'scoring',
            'search_space': [{'k': [3],
                              'module_type': 'knn',
                              'weights': ['uniform', 'distance', 'closest']},
                             {'module_type': 'linear'}]},
           {'metric': 'prediction_accuracy',
            'node_type': 'prediction',
            'search_space': [{'module_type': 'threshold', 'thresh': [0.5]}]}]}


In [None]:
from autointent.pipeline.pipeline import Pipeline
from autointent.configs.pipeline import PipelineOptimizationConfig


pipeline: Pipeline = instantiate(PipelineOptimizationConfig(), **config)

In [6]:
type(pipeline)

autointent.pipeline.pipeline.Pipeline

In [7]:
pipeline.optimize(context)

In [8]:
pipeline.dump("", run_name)