# 3.1 Convolutio - Complete pipeline with legacy fields

This notebook extends the full pipeline to include the legacy field extractor and logs all steps with MLflow.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from archaeo_super_prompt.dataset import MagohDataset, SamplingParams
import archaeo_super_prompt.modeling.train as training
import archaeo_super_prompt.modeling.predict as infering
import mlflow
import pandas as pd
from archaeo_super_prompt.visualization import mlflow_logging as mmlflow
from archaeo_super_prompt.config.env import getenv_or_throw
from sklearn import set_config


In [None]:
EXP_NAME = "Legacy field extraction"
mlflow.set_tracking_uri(f"http://{getenv_or_throw('MLFLOW_HOST')}:{getenv_or_throw('MLFLOW_PORT')}")
mlflow.set_experiment(EXP_NAME)
mlflow.dspy.autolog(log_compiles=True, log_evals=True, log_traces_from_compile=True)
pd.set_option('display.max_columns', None)
set_config(display="diagram")


In [None]:
dag_parts = training.get_training_dag(include_legacy=True)
expected_final_pipeline = infering.build_complete_inference_dag(dag_parts)
expected_final_pipeline


In [None]:
ds = MagohDataset(SamplingParams(size=20, seed=0.1, only_recent_entries=False))
inputs = ds.files.sample(6)
train_inputs, eval_inputs = inputs.iloc[:3], inputs.iloc[3:]


In [None]:
with mlflow.start_run():
    trained_dag_parts = training.train_from_scratch(train_inputs, ds, include_legacy=True)
    per_field_scores, detailed_results = infering.score_dag(trained_dag_parts, eval_inputs, ds)


In [None]:
detailed_results.head()
