## Optimization

In [1]:
from autointent.nodes.optimization import NodeOptimizer

In [2]:
import logging

logger = logging.getLogger(__name__)

In [3]:
from autointent import Context
from autointent.pipeline.optimization.utils import get_run_name, load_data, get_db_dir


run_name = get_run_name("multiclass-cpu")
db_dir = get_db_dir("", run_name)

data = load_data("/home/voorhs/repos/AutoIntent/tests/minimal_optimization/data/clinc_subset.json", multilabel=False)
context = Context(
    multiclass_intent_records=data,
    multilabel_utterance_records=[],
    test_utterance_records=[],
    device="cpu",
    mode="multiclass_as_multilabel",
    multilabel_generation_config="",
    db_dir=db_dir,
    regex_sampling=0,
    seed=0,
    dump_dir="modules_dumps"
)

### Retrieval

In [4]:
retrieval_optimizer_config = {
    'metric': 'retrieval_hit_rate_intersecting',
    'node_type': 'retrieval',
    'search_space': [
        {
            'k': [10],
            'model_name': ['deepvk/USER-bge-m3'],
            'module_type': 'vector_db'
        }
    ]
}

In [5]:
retrieval_optimizer = NodeOptimizer.from_dict_config(retrieval_optimizer_config)

In [6]:
retrieval_optimizer.fit(context)

### Scoring

In [7]:
scoring_optimizer_config = {
    'metric': 'scoring_roc_auc',
    'node_type': 'scoring',
    'search_space': [
        {
            'k': [3],
            'module_type': 'knn',
            'weights': ['uniform', 'distance', 'closest']
        },
        {
            'module_type': 'linear'
        },
        {
            "module_type": "mlknn",
            "k": [5]
        }
    ]
}

In [8]:
scoring_optimizer = NodeOptimizer.from_dict_config(scoring_optimizer_config)

In [9]:
scoring_optimizer.fit(context)

### Prediction

In [None]:
prediction_optimizer_config = {
    'metric': 'prediction_accuracy',
    'node_type': 'prediction',
    'search_space': [
        {
            'module_type': 'threshold',
            'thresh': [0.5]
        }
    ]
}

In [10]:
prediction_optimizer = NodeOptimizer.from_dict_config(prediction_optimizer_config)

In [11]:
prediction_optimizer.fit(context)

### check result

In [12]:
context.optimization_info.dump_evaluation_results()

{'metrics': {'regexp': [],
  'retrieval': [1.0],
  'scoring': [1.0, 1.0, 1.0, 1.0],
  'prediction': [0.8333333333333334]},
 'configs': {'regexp': [],
  'retrieval': [{'module_type': 'vector_db',
    'module_params': {'k': 10, 'model_name': 'deepvk/USER-bge-m3'},
    'metric_name': 'retrieval_hit_rate_intersecting',
    'metric_value': 1.0}],
  'scoring': [{'module_type': 'knn',
    'module_params': {'k': 3, 'weights': 'uniform'},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0},
   {'module_type': 'knn',
    'module_params': {'k': 3, 'weights': 'distance'},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0},
   {'module_type': 'knn',
    'module_params': {'k': 3, 'weights': 'closest'},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0},
   {'module_type': 'linear',
    'module_params': {},
    'metric_name': 'scoring_roc_auc',
    'metric_value': 1.0}],
  'prediction': [{'module_type': 'threshold',
    'module_params': {'thresh': 0.5},
    'metric_

## Inference

In [10]:
from autointent.nodes import InferenceNode

### Retrieval

In [11]:
import gc
import torch


for trial in context.optimization_info.trials.retrieval:
    print(f"\n==== {trial.module_type} ====\n")
    config = dict(
        node_type="retrieval",
        module_type=trial.module_type,
        module_config=trial.module_params,
        load_path=trial.module_dump_dir,
    )
    node = InferenceNode(**config)
    labels, distances, texts = node.module.predict(["hello", "world"])
    print(labels[0], distances[0], texts[0])
    node.module.clear_cache()
    gc.collect()
    torch.cuda.empty_cache()


==== vector_db ====

[[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 1, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [1, 0, 0]] [np.float32(0.477605), np.float32(0.49597514), np.float32(0.50022346), np.float32(0.5128485), np.float32(0.55573064), np.float32(0.5758226), np.float32(0.58933824), np.float32(0.6130637), np.float32(0.6174678), np.float32(0.63502705)] ['please set an alarm for mid day', 'make sure my alarm is set for three thirty in the morning', 'have an alarm set for three in the morning', 'wake me up at noon tomorrow', 'i think my account is blocked but i do not know the reason', 'can you tell me why is my bank account frozen', 'can i make a reservation for redrobin', 'why is there a hold on my capital one checking account', 'why is there a hold on my american saving bank account', 'are reservations taken at redrobin']


### Scoring

In [13]:
for trial in context.optimization_info.trials.scoring:
    print(f"\n==== {trial.module_type} ====\n")
    config = dict(
        node_type="scoring",
        module_type=trial.module_type,
        module_config=trial.module_params,
        load_path=trial.module_dump_dir,
    )
    node = InferenceNode(**config)
    scores = node.module.predict(["hello", "world"])
    print(scores)
    node.module.clear_cache()
    gc.collect()
    torch.cuda.empty_cache()


==== knn ====

[[0. 0. 1.]
 [0. 0. 1.]]

==== knn ====

[[0.         0.         1.        ]
 [0.         0.         0.99999998]]

==== knn ====

[[0.         0.         0.7611975 ]
 [0.         0.         0.73897344]]

==== linear ====

[[0.27486506 0.31681463 0.37459106]
 [0.2769358  0.31536099 0.37366978]]

==== mlknn ====

[[0.08860759 0.1147541  0.79545455]
 [0.08860759 0.1147541  0.79545455]]


### Prediction

In [None]:
prediction_config = dict(
    node_type="prediction",
    module_type="threshold",
    module_config={"thresh": 0.5},
    load_path="."
)

In [None]:
prediction = InferenceNode(**prediction_config)

In [None]:
prediction.module.predict(context, ...)