# Hydra configs for nodes

In [1]:
import logging

logger = logging.getLogger(__name__)

In [2]:
from autointent import Context
from autointent.pipeline.optimization.utils import get_run_name, load_data
from autointent.pipeline.utils import get_db_dir
from autointent.custom_types import ClassificationMode


run_name = get_run_name("multiclass-cpu")
db_dir = get_db_dir("", run_name)

data = load_data("/home/voorhs/repos/AutoIntent/tests/minimal-optimization/data/clinc_subset.json", multilabel=False)
context = Context(
    multiclass_intent_records=data,
    multilabel_utterance_records=[],
    test_utterance_records=[],
    device="cpu",
    mode=ClassificationMode.multiclass_as_multilabel,
    multilabel_generation_config="",
    db_dir=db_dir,
    regex_sampling=0,
    seed=0,
)

In [3]:
from autointent.pipeline.optimization.utils import load_config

config = load_config("/home/voorhs/repos/AutoIntent/autointent/datafiles/default-multilabel-config.yaml", multilabel=True)

In [4]:
from pprint import pprint

pprint(config)

{'nodes': [{'metric': 'retrieval_hit_rate_intersecting',
            'node_type': 'retrieval',
            'search_space': [{'k': [10],
                              'model_name': ['deepvk/USER-bge-m3'],
                              'module_type': 'vector_db'}]},
           {'metric': 'scoring_roc_auc',
            'node_type': 'scoring',
            'search_space': [{'k': [3],
                              'module_type': 'knn',
                              'weights': ['uniform', 'distance', 'closest']},
                             {'module_type': 'linear'}]},
           {'metric': 'prediction_accuracy',
            'node_type': 'prediction',
            'search_space': [{'module_type': 'threshold', 'thresh': [0.5]}]}]}


## Retrieval

In [5]:
from autointent.nodes.optimization import NodeOptimizer

retrieval_optimizer = NodeOptimizer.from_dict_config(config["nodes"][0])

In [6]:
retrieval_optimizer.fit(context)

## Scoring

In [7]:
scoring_optimizer = NodeOptimizer.from_dict_config(config["nodes"][1])

In [8]:
scoring_optimizer.fit(context)

In [9]:
scoring_optimizer

<autointent.nodes.optimization.node_optimizer.NodeOptimizer at 0x77c0d7fe8c20>

## Prediction

In [10]:
prediction_optimizer = NodeOptimizer.from_dict_config(config["nodes"][2])

In [11]:
prediction_optimizer.fit(context)

## check result

In [12]:
context.optimization_info.dump_evaluation_results()

{'metrics': {'regexp': [],
  'retrieval': [1.0],
  'scoring': [1.0, 1.0, 1.0, 1.0],
  'prediction': [0.8333333333333334]},
 'configs': {'regexp': [],
  'retrieval': [{'module_type': 'vector_db',
    'module_params': {'k': 10, 'model_name': 'deepvk/USER-bge-m3'},
    'metric_name': 'retrieval_hit_rate_intersecting',
    'metric_value': 1.0}],
  'scoring': [{'module_type': 'knn',
    'module_params': {'k': 3, 'weights': 'uniform'},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0},
   {'module_type': 'knn',
    'module_params': {'k': 3, 'weights': 'distance'},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0},
   {'module_type': 'knn',
    'module_params': {'k': 3, 'weights': 'closest'},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0},
   {'module_type': 'linear',
    'module_params': {},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0}],
  'prediction': [{'module_type': 'threshold',
    'module_params': {'thresh': 0.5},
    'metric_