In [22]:
import mlflow
from src.mlflow_utils import HyperNodeMLFlow, EnvironmentGenerator, get_code_paths
from src.hypernodes import HyperNode
import pandas as pd

In [23]:
mlflow.set_experiment("hypernodes")

<Experiment: artifact_location='file:///c:/python_workspace/pdf_extract/mlruns/920054831142091747', creation_time=1723992897632, experiment_id='920054831142091747', last_update_time=1723992897632, lifecycle_stage='active', name='hypernodes', tags={}>

In [24]:
rag_node = HyperNode.load("src/nodes/rag_qa")

In [25]:
overrides = {"llm_model" : "mini"}

In [26]:
model = HyperNodeMLFlow(rag_node, final_vars=["llm_response"], overrides=overrides)

# Define Artifacts

In [27]:
artifacts = {"texts_path" : "data/raw", 
             "env_file" : ".env"}

# Test Class

In [38]:
model_input = pd.DataFrame({"query" : ["What year was the transformer architecture introduced?"]})
class Context:
    def __init__(self, artifacts):
        self.artifacts = artifacts
        
context = Context(artifacts)
model.predict(context=context, model_input=model_input)

'2017'

# Define Environment

In [29]:
env = EnvironmentGenerator(env_name="hypernodes", 
                           dependency_file="requirements.txt")
conda_env = env.get_conda_environment_dict()

# Add source files

In [30]:
code_paths = get_code_paths(folders=["src"], suffix=".py")

In [31]:
code_paths

['c:\\python_workspace\\pdf_extract\\src\\app.py',
 'c:\\python_workspace\\pdf_extract\\src\\hypernodes.py',
 'c:\\python_workspace\\pdf_extract\\src\\mlflow_utils.py',
 'c:\\python_workspace\\pdf_extract\\src\\nodes\\batch_qa\\batch_qa_dag.py',
 'c:\\python_workspace\\pdf_extract\\src\\nodes\\batch_qa\\batch_qa_hp_config.py',
 'c:\\python_workspace\\pdf_extract\\src\\nodes\\rag_qa\\rag_qa_dag.py',
 'c:\\python_workspace\\pdf_extract\\src\\nodes\\rag_qa\\rag_qa_hp_config.py',
 'c:\\python_workspace\\pdf_extract\\src\\nodes\\sklearn_ranker\\sklearn_ranker_dag.py',
 'c:\\python_workspace\\pdf_extract\\src\\nodes\\sklearn_ranker\\sklearn_ranker_hp_config.py']

# Log Model

In [32]:
import mlflow

with mlflow.start_run():
    log_res = mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=model,
        artifacts=artifacts,
        conda_env=conda_env,
        code_paths=code_paths,
    )
    model_uri = f"runs:/{log_res.run_id}/model"
    model_reg = mlflow.register_model(model_uri, "pdf-model")

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 500.04it/s]
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 1000.79it/s]
Registered model 'pdf-model' already exists. Creating a new version of this model...
Created version '7' of model 'pdf-model'.


# Test Model Locally

In [33]:
%%time
mlflow_model = mlflow.pyfunc.load_model(model_uri)

CPU times: total: 0 ns
Wall time: 135 ms


In [36]:
input_example = pd.DataFrame({"query" : ["What year was the transformer architecture introduced?"]})

In [37]:
mlflow_model.predict(input_example)

'2017'