# 

In [None]:
# datafabric/Mistral7B/mistral-7b-v0.1.Q4_K_M.gguf

In [1]:
!pwd

/home/jovyan/local


## Sample script provided by Z by HP documentation to register a `mistral-7b-v0.1.Q4_K_M.gguf` model
The model was successfully registered and deployed in MLflow (AIS built-in); however, inference attempts resulted in a "prediction:null" output.

In [1]:
# register Mistral gguf
import os
import mlflow
from mlflow.pyfunc import PythonModel
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec, ParamSchema, ParamSpec

class AIStudioChatbotService(PythonModel):
    @classmethod
    def log_model(cls, model_folder=None, demo_folder="demo"):
        # Ensure the demo folder exists
        if demo_folder and not os.path.exists(demo_folder):
            os.makedirs(demo_folder, exist_ok=True)

        # Define input schema for the model
        input_schema = Schema([
            ColSpec("string", "query"),
            ColSpec("string", "prompt"),
            ColSpec("string", "document")
        ])
        
        # Define output schema for the model
        output_schema = Schema([
            ColSpec("string", "chunks"),
            ColSpec("string", "history"),
            ColSpec("string", "prompt"),
            ColSpec("string", "output"),
            ColSpec("boolean", "success")
        ])
        
        # Define parameters schema for additional settings
        param_schema = ParamSchema([
            ParamSpec("add_pdf", "boolean", False),
            ParamSpec("get_prompt", "boolean", False),
            ParamSpec("set_prompt", "boolean", False),
            ParamSpec("reset_history", "boolean", False)
        ])
        
        # Combine schemas into a model signature
        signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=param_schema)

        # Define model artifacts
        artifacts = {"demo": demo_folder}
        if model_folder:
            artifacts["models"] = model_folder

        # Log the model in MLflow
        mlflow.pyfunc.log_model(
            artifact_path="aistudio_chatbot_service",
            python_model=cls(),
            artifacts=artifacts,
            signature=signature,
            pip_requirements=[
                "pyyaml",
                "tokenizers==0.20.3",
                "httpx==0.27.2",
            ]
        )
        print("Model and artifacts successfully registered in MLflow.")

# Initialize the MLflow experiment
print("Initializing experiment in MLflow.")
mlflow.set_experiment("AIStudioChatbot_Service")

# Define required paths
model_folder = "/home/jovyan/datafabric/Mistral7B/mistral-7b-v0.1.Q4_K_M.gguf"
demo_folder = "demo"   

# Ensure required directories exist before proceeding
if demo_folder and not os.path.exists(demo_folder):
    os.makedirs(demo_folder, exist_ok=True)

# Start an MLflow run and log the model
with mlflow.start_run(run_name="AIStudioChatbot_Service_Run") as run:
    AIStudioChatbotService.log_model(
        demo_folder=demo_folder,
        model_folder=model_folder
    )
    
    # Register the model in MLflow
    model_uri = f"runs:/{run.info.run_id}/aistudio_chatbot_service"
    mlflow.register_model(
        model_uri=model_uri,
        name="Mistral_Chatbot",
    )
    print(f"Registered model with execution ID: {run.info.run_id}")
    print(f"Model registered successfully. Run ID: {run.info.run_id}")


Initializing experiment in MLflow.


Downloading artifacts: 0it [00:00, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

 - tokenizers (current: uninstalled, required: tokenizers==0.20.3)
 - httpx (current: 0.28.1, required: httpx==0.27.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


Model and artifacts successfully registered in MLflow.


Registered model 'Mistral_Chatbot' already exists. Creating a new version of this model...


Registered model with execution ID: 5731041d9e384961bd1d1c09ba9a1488
Model registered successfully. Run ID: 5731041d9e384961bd1d1c09ba9a1488


Created version '3' of model 'Mistral_Chatbot'.


## Modified script to register the gemma-2B model (`gemma-2b-it.Q4_K_M.gguf`) using llama-python-cpp framework on MLflow (AIS built-in)
The model was successfully registered in MLflow (AIS built-in), and inference was performed successfully after loading the model from MLflow. However, deployment via Swagger was unsuccessful despite extensive troubleshooting efforts. As a result, we opted to deploy the pre-trained Gemma-2B model directly on the backend of our web application.

In [None]:
# Load the model from the model registry and score
model_uri = f"models:/{reg_model_name}/1"
loaded_model = mlflow.pyfunc.load_model(model_uri)
score_model(loaded_model)

In [None]:
# register Gemma gguf 0607
import os
import mlflow
from mlflow.pyfunc import PythonModel
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec, ParamSchema, ParamSpec
from llama_cpp import Llama


class AIStudioChatbotService(PythonModel):
    def __init__(self):
        super().__init__()
        self._llama = Llama(
            model_path="/home/jovyan/local/gemma-2b-it.Q4_K_M.gguf", #YOUR MODEL PATH HERE
            n_ctx=8192,
            verbose=False
        )

    def predict(self, context, model_input):
        prompt = model_input["prompt"][0]
        print(f"Type of prompt is: {type(prompt)}")
        result = self._llama(prompt, temperature=0.0, max_tokens=256, stop=["</s>"])
        text = result["choices"][0]["text"]
        outputs={
                "chunks": "",
                "history": "",
                "prompt": prompt,
                "output": text.strip(),
                "success": True
        }
        return outputs
        
    @classmethod
    def log_model(cls, model_folder=None, demo_folder="demo"):
        # Ensure the demo folder exists
        if demo_folder and not os.path.exists(demo_folder):
            os.makedirs(demo_folder, exist_ok=True)

        # Define input schema for the model
        input_schema = Schema([
            ColSpec("string", "query"),
            ColSpec("string", "prompt"),
            ColSpec("string", "document")
        ])
        
        # Define output schema for the model
        output_schema = Schema([
            ColSpec("string", "chunks"),
            ColSpec("string", "history"),
            ColSpec("string", "prompt"),
            ColSpec("string", "output"),
            ColSpec("boolean", "success")
        ])
        
        # Define parameters schema for additional settings
        param_schema = ParamSchema([
            ParamSpec("add_pdf", "boolean", False),
            ParamSpec("get_prompt", "boolean", False),
            ParamSpec("set_prompt", "boolean", False),
            ParamSpec("reset_history", "boolean", False)
        ])
        
        # Combine schemas into a model signature
        signature = ModelSignature(inputs=input_schema, outputs=output_schema, params=param_schema)

        # Define model artifacts
        artifacts = {"demo": demo_folder}
        if model_folder:
            artifacts["models"] = model_folder

        # Log the model in MLflow
        mlflow.pyfunc.log_model(
            artifact_path="aistudio_chatbot_service",
            python_model=cls(),
            artifacts=artifacts,
            signature=signature,
            pip_requirements=[
                "pyyaml",
                "tokenizers==0.20.3",
                "httpx==0.27.2",
            ]
        )
        print("Model and artifacts successfully registered in MLflow.")

# Initialize the MLflow experiment
print("Initializing experiment in MLflow.")
mlflow.set_experiment("AIStudioChatbot_Service")

# Define required paths
model_folder = "/home/jovyan/local/gemma-2b-it.Q4_K_M.gguf"
demo_folder = "demo"   

# Ensure required directories exist before proceeding
if demo_folder and not os.path.exists(demo_folder):
    os.makedirs(demo_folder, exist_ok=True)

# Start an MLflow run and log the model
with mlflow.start_run(run_name="Gemma_Test_Run_gguf_0607") as run:
    AIStudioChatbotService.log_model(
        demo_folder=demo_folder,
        model_folder=model_folder
    )
    
    # Register the model in MLflow
    model_uri = f"runs:/{run.info.run_id}/aistudio_chatbot_service"
    mlflow.register_model(
        model_uri=model_uri,
        name="Gemma_test_0607",
    )
    print(f"Registered model with execution ID: {run.info.run_id}")
    print(f"Model registered successfully. Run ID: {run.info.run_id}")

Initializing experiment in MLflow.


Downloading artifacts: 0it [00:00, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Model and artifacts successfully registered in MLflow.
Registered model with execution ID: 32a27d393d8b4c1c94c71b126a7982fb
Model registered successfully. Run ID: 32a27d393d8b4c1c94c71b126a7982fb


Registered model 'Gemma_test_0607' already exists. Creating a new version of this model...
Created version '4' of model 'Gemma_test_0607'.


In [None]:
# Load the model from the model registry and run inference
import pandas as pd

def run_model_inference(loaded_model, m_input):
    # Use inference to predict output from the customized PyFunc model
    scores = loaded_model.predict(m_input)
    print(scores)
    
# Load the model from the model registry and score

loaded_model = mlflow.pyfunc.load_model(model_uri) # example: model_uri = f"models:/{reg_model_name}/1"
m_input={'prompt':"What is MLflow?", "query": "", "document": ""}
run_model_inference(loaded_model, m_input)

 - tokenizers (current: uninstalled, required: tokenizers==0.20.3)
 - httpx (current: 0.28.1, required: httpx==0.27.2)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


Type of prompt is: <class 'str'>
{'chunks': '', 'history': '', 'prompt': 'What is MLflow?', 'output': "MLflow is an open-source platform for managing the entire machine learning lifecycle, from data preparation to model deployment. It provides a centralized repository for storing and tracking all the steps of the machine learning process, making it easier to track, reproduce, and improve the model's performance.\n\n**Key features of MLflow:**\n\n* **Data Management:** MLflow allows you to store and manage data in a central repository, ensuring data quality and version control.\n* **Model Tracking:** MLflow tracks the entire machine learning process, from data preparation to model deployment, making it easy to track and reproduce the model's performance.\n* **Collaboration:** MLflow provides a platform for collaboration among data scientists, engineers, and business users, facilitating knowledge sharing and reproducibility.\n* **Model Versioning:** MLflow allows you to version your mode

In [None]:
# Check input prompt for the model
m_input["prompt"][0]

'What is MLflow?'

In [None]:
# check model dependencies
model_uri = "runs:/9a853b804d004fc6b242706337371b85/aistudio_chatbot_service" ## !! REPLACE with your model URI
mlflow.pyfunc.get_model_dependencies(model_uri)

2025/06/07 21:25:20 INFO mlflow.pyfunc: To install the dependencies that were used to train the model, run the following command: '%pip install -r /phoenix/mlflow/172877855724559448/9a853b804d004fc6b242706337371b85/artifacts/aistudio_chatbot_service/requirements.txt'.


'/phoenix/mlflow/172877855724559448/9a853b804d004fc6b242706337371b85/artifacts/aistudio_chatbot_service/requirements.txt'