In [1]:
import mlflow
from src.mlflow_utils import HyperNodeMLFlow, EnvironmentGenerator, get_code_paths
from src.hypernodes import HyperNode
import pandas as pd

In [2]:
mlflow.set_experiment("hypernodes")

<Experiment: artifact_location='file:///c:/python_workspace/hypernodes-demo/mlruns/425819731381642893', creation_time=1724146491228, experiment_id='425819731381642893', last_update_time=1724146491228, lifecycle_stage='active', name='hypernodes', tags={}>

In [3]:
rag_node = HyperNode.load("src/nodes/rag_qa")
rag_node.instantiate_inputs(selections={"llm_model" : "sonnet"}, 
                            overrides={"ranker.top_k" : 3})
rag_node.execute(["llm_response"], inputs=rag_node._instantiated_inputs) # type: ignore

{'llm_response': 'Retrieval-augmented'}

In [4]:
overrides = {"llm_model" : "mini"}

In [5]:
model = HyperNodeMLFlow(rag_node, final_vars=["llm_response"], overrides=overrides)

# Define Artifacts

In [6]:
artifacts = {"texts_path" : "data/raw", 
             "env_file" : ".env"}

# Test Class

In [7]:
model_input = pd.DataFrame({"query" : ["What year was the transformer architecture introduced?"]})
class Context:
    def __init__(self, artifacts):
        self.artifacts = artifacts
        
context = Context(artifacts)
model.predict(context=context, model_input=model_input)

'2017'

# Define Environment

In [8]:
env = EnvironmentGenerator(env_name="hypernodes", 
                           dependency_file="requirements.txt")
conda_env = env.get_conda_environment_dict()

# Add source files

In [9]:
code_paths = get_code_paths(folders=["src"], suffix=".py")

In [10]:
code_paths

['c:\\python_workspace\\hypernodes-demo\\src\\app.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\hypernodes.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\mlflow_utils.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\__init__.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\nodes\\batch_qa\\batch_qa_dag.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\nodes\\batch_qa\\batch_qa_hp_config.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\nodes\\rag_qa\\rag_qa_dag.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\nodes\\rag_qa\\rag_qa_hp_config.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\nodes\\sklearn_ranker\\sklearn_ranker_dag.py',
 'c:\\python_workspace\\hypernodes-demo\\src\\nodes\\sklearn_ranker\\sklearn_ranker_hp_config.py']

# Log Model

In [11]:
import mlflow

with mlflow.start_run():
    log_res = mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=model,
        artifacts=artifacts,
        conda_env=conda_env,
        code_paths=code_paths,
    )
    model_uri = f"runs:/{log_res.run_id}/model"
    model_reg = mlflow.register_model(model_uri, "pdf-model")

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 992.97it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 1000.79it/s]
Registered model 'pdf-model' already exists. Creating a new version of this model...
Created version '2' of model 'pdf-model'.


# Test Model Locally

In [12]:
%%time
mlflow_model = mlflow.pyfunc.load_model(model_uri)

CPU times: total: 0 ns
Wall time: 152 ms


In [13]:
input_example = pd.DataFrame({"query" : ["What year was the transformer architecture introduced?"]})

In [14]:
mlflow_model.predict(input_example)

'2017'