# Custom Model Multi-turn Evaluation Demo

**🎯 Goal**:
- Run a [multi-turn evaluation](https://docs.okareo.ai/docs/guides/multiturn_overview) in Okareo.
- Provide a simple introduction to Okareo evaluations.

**📋 Steps**:
1. Upload a multi-turn scenario.
2. Define a custom model to act as a Target in a multi-turn conversation.
3. Run the evaluation using the scenario (1.) + model (2.) and checks for measuring behavioral adherence.

In [None]:
%pip install --upgrade okareo

In [None]:
# get Okareo client
import os
from okareo import Okareo

OKAREO_API_KEY = os.environ.get("OKAREO_API_KEY", "<YOUR_OKAREO_API_KEY>")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "<YOUR_OPENAI_API_KEY>")
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "<YOUR_OPENAI_API_KEY>")
print(f"Using Okareo API key: {OKAREO_API_KEY}")
okareo = Okareo(OKAREO_API_KEY)

Upload a simple scenario. Each row of the `seed_data` should contain:

- `input_`: a prompt used to direct the Driver.
- `result`: a behavioral directive that we want the Target to adhere to.

In [None]:
import random
import string

from okareo_api_client.models.scenario_set_create import ScenarioSetCreate
from okareo_api_client.models.seed_data import SeedData

def random_string(length: int) -> str:
    return "".join(random.choices(string.ascii_letters, k=length))

def persona_prompt(objective: str) -> str:
    return f"""You are a black-belt in 6-sigma. You are talking to a co-pilot who is an expert in 6-sigma problem-solving and process improvement.
Your goal is to have the co-pilot produce a clear and concise plan for each phase. Your approach is to provide the co-pilot with a clear problem statement.
If the co-pilot does not understand the problem, it will ask you for clarification. Answer the questions.
If the co-pilot provides a good starting point for the plan, then request the next phase of the plan.
The range of actions you can take are listed below.
When articulating the problem statement, always use the full problem statement provided below.
NEVER DO THE CO-PILOT'S WORK FOR IT.

Problem Statement:
    {objective}

Available Actions:
    1. Use the provided Problem Statement as written to clearly and concisely request a plan.
    2. Review each phase of the plan. If acceptable, request the next phase.
    3. If an element in the plan is not acceptable, ASK A CLARIFYING QUESTION to understand the issue, then request a modification to the plan.
    4. If the plan is complete, then say "Plan complete" and end the conversation.
"""

seeds = [
    SeedData(
        input_=persona_prompt("""Our main competitor in the SaaS space recently announced their internal "AI-Assist" help desk resolved 60 percent of all IT issues without human intervention,
    leading to a 40-point increase in their employee Net Promoter Score (eNPS) for IT support. In contrast, a well-known tech firm suffered a major data breach and
    reputational damage last quarter when their GenAI support bot was tricked into leaking sensitive employee data. Our current IT help desk has a low 35 percent first-
    contact resolution rate and a negative eNPS score, frustrating employees and tying up expensive engineering resources on basic support tasks, leaving us
    inefficient and exposed to the same security vulnerabilities."""),
        result="""Clear guidance and a structured approach to address the problem statement.""",
    ),
]

scenario_set_create = ScenarioSetCreate(
    name=f"AnalyticsAIML - v0.5",
    seed_data=seeds,
    
)
scenario = okareo.create_scenario_set(scenario_set_create)

print(scenario.app_link)

In [None]:
from okareo.model_under_test import CustomMultiturnTarget, ModelInvocation
import requests
import json

from openai import OpenAI

class BlackBeltDriver(CustomMultiturnTarget):
    def __init__(self, name: str):
        self.name = name
    
    def use_6sigma(self, last_msg: str, msg_history: list) -> str:
        print(f"6Sigma Message: {last_msg}")
        url = 'https://six-sigma-suite-frankshines.replit.app/api/chat/message'  # Example URL for testing
        json_data = json.dumps({
            "message": last_msg,  # Use the last message content
            "conversationHistory": msg_history,
        })
        headers = {'Content-type': 'application/json' }
        response = requests.post(url, data=json_data, headers=headers)

        # Print the response body as JSON
        result = response.json()["response"]
        print(f"6Sigma Response: {result}")
        return result
    
    def use_OpenAI(self, last_msg: str, msg_history: list) -> str:
        print(f"OpenAI Message: {last_msg}")
        openai = OpenAI(
            base_url="https://proxy.okareo.com",
            default_headers={"api-key": OKAREO_API_KEY},
            api_key = OPENAI_API_KEY
        )

        response = openai.chat.completions.create(
            model="gpt-4o-mini",
            messages=msg_history+[
                {"role": "user", "content": last_msg}
            ],
            max_tokens=150,
            temperature=0.7,
        )
        generated_text = response.choices[0].message.content.strip()
        # Send the POST request
        return generated_text
    
    def use_Gemini(self, last_msg: str, msg_history: list) -> str:
        print(f"Gemini Message: {last_msg}")
        openai = OpenAI(
            base_url="https://proxy.okareo.com",
            default_headers={"api-key": OKAREO_API_KEY},
            api_key = GEMINI_API_KEY
        )
        response = openai.chat.completions.create(
            model="gemini/gemini-1.5-pro-002",
            messages=msg_history+[
                {"role": "user", "content": last_msg}
            ],
            max_tokens=150,
            temperature=0.7,
        )
        generated_text = response.choices[0].message.content.strip()
        # Send the POST request
        return generated_text

    def invoke(self, messages: list) -> ModelInvocation:
        msg_history = []
        last_msg = ""
        if (len(messages) >= 2):
            last_msg = messages[-1]["content"]
            msg_history = messages[1:-1]  # Exclude the last message for processing
            
            result = self.use_6sigma(last_msg, msg_history)

            return ModelInvocation(
                result,
                messages,
                {}
            )

blackbelt_driver = BlackBeltDriver("BlackBeltDriver")

Register a `MultiTurnDriver` using your CustomModel as the Target.

We use the predefined [check](https://docs.okareo.ai/docs/getting-started/concepts/checks) called Behavior Adherence to evaluate how well the Target is adhereing to its  directive to only talk about WebBizz.

In [None]:
from okareo.model_under_test import GenerationModel, MultiTurnDriver, StopConfig

multiturn_model = okareo.register_model(
    name="Persona Behavior 2.3",
    model=MultiTurnDriver(
        driver_temperature=1,
        max_turns=7,
        repeats=1,
        #driver_model_id="gemini/gemini-1.5-pro-002",
        first_turn="driver",
        target=blackbelt_driver,
        stop_check=StopConfig(check_name="DMAIC Complete", stop_on=True)
    ),
    update=True,
)

Run a [Multiturn evaluation](https://docs.okareo.ai/docs/guides/multiturn_overview#step-5-run-an-evaluation) on the custom model. 

In [None]:
from okareo_api_client.models.test_run_type import TestRunType


evaluation = multiturn_model.run_test(
    scenario=scenario,
    name="Persona Behavior 2.3",
    test_run_type=TestRunType.MULTI_TURN,
    checks=[
        "P1 Define Complete", 
        "P2 Measure Complete", 
        "P3 Analyze Complete", 
        "P4 Improve Complete",
        "P5 Control Complete",
        "DMAIC Planning Progress", 
        "Planning Complete", 
    ],
)

print(f"See results in Okareo: {evaluation.app_link}")

In [None]:
# CLEAN UP
"""
{
        "id": "2628495a-9a22-47df-a2f4-22226aa96668",
        "project_id": "8c69ab40-7e5e-4449-ad8c-f1e271cf62e6",
        "mut_id": "4ae74abd-2b02-46d8-bc57-6d45674adf6d",
        "scenario_set_id": "e904e39c-02f6-4f21-a77e-8b4d8b4f72c4",
        "filter_group_id": "38e0d40c-931e-403a-9d97-5adc3d61e0fd",
        "name": "GUI Test 2",
        "tags": [
            "multi-turn-eval"
        ],
        "type": "MULTI_TURN",
        "start_time": "2025-06-04T05:16:43.203008",
        "end_time": "2025-06-04T05:16:43.601439",
        "test_data_point_count": null,
        "model_metrics": {},
        "error_matrix": [],
        "status": "FAILED",
        "progress": 0,
        "app_link": "https://app.okareo.com/project/8c69ab40-7e5e-4449-ad8c-f1e271cf62e6/eval/2628495a-9a22-47df-a2f4-22226aa96668"
    },
"""
import requests
import json

okareo_headers = {
    'Content-type': 'application/json',
    "api-key": OKAREO_API_KEY, 
}
find_url = 'https://api.okareo.com/v0/find_test_runs'  # Example URL for testing
find_json_data = json.dumps({
    "project_id": "8c69ab40-7e5e-4449-ad8c-f1e271cf62e6",
})
find_response = requests.post(find_url, data=find_json_data, headers=okareo_headers)

# Print the response body as JSON
result = find_response.json()
failed_multi_turn_ids = []
failed_id_str = ""
item = 0
for run in result:
    if run["type"] == "MULTI_TURN" and run["status"] == "FAILED":
        if item == 0:
            failed_id_str += f"test_run_ids={run['id']}"
        else:
            failed_id_str += f"&test_run_ids={run['id']}"
        item += 1
        failed_multi_turn_ids.append(run["id"])


print(f"{failed_id_str}")
delete_url = f"https://api.okareo.com/v0/test_runs?{failed_id_str}"
find_json_data = json.dumps({
    "project_id": "8c69ab40-7e5e-4449-ad8c-f1e271cf62e6",
})
delete_response = requests.delete(delete_url, headers=okareo_headers)
print(f"Response {delete_response}")