## Install packages and set up imports

In [None]:
!pip install collinear --upgrade

## Load model, setup client

In [None]:
# Config Variables (from configs/simulation_config.json and steering_config_*.json)
import json
from pathlib import Path

CONFIG_DIR = Path('configs')
SIMULATION_CONFIG_FILE = CONFIG_DIR / 'simulation_config.json'
config_data = json.loads(SIMULATION_CONFIG_FILE.read_text())

steering_config_value = config_data.get('steering_config_file') or 'steering_config_airline.json'
steering_candidate = Path(steering_config_value)
if not steering_candidate.is_absolute():
    steering_candidate = CONFIG_DIR / steering_candidate.name
STEERING_CONFIG_FILE = steering_candidate
STEER_CONFIG = json.loads(STEERING_CONFIG_FILE.read_text())
STEER_TASKS = list(STEER_CONFIG.get('tasks', []))

# Client options
client_settings = config_data.get('client', {}) or {}
CLIENT_ASSISTANT_MODEL_URL = client_settings.get('assistant_model_url', 'https://api.openai.com/v1')
CLIENT_ASSISTANT_MODEL_API_KEY = client_settings.get('assistant_model_api_key')
CLIENT_ASSISTANT_MODEL_NAME = client_settings.get('assistant_model_name', 'gpt-4o-mini')
CLIENT_STEER_API_KEY = client_settings.get('steer_api_key', 'demo-001')
CLIENT_TIMEOUT = float(client_settings.get('timeout', 120))
CLIENT_MAX_RETRIES = int(client_settings.get('max_retries', 3))
CLIENT_RATE_LIMIT_RETRIES = int(client_settings.get('rate_limit_retries', 6))

# Simulation options
simulate_settings = config_data.get('simulate', {}) or {}
SIM_SAMPLES = simulate_settings.get('k', 1)
SIM_EXCHANGES = simulate_settings.get('num_exchanges', 2)
SIM_DELAY = simulate_settings.get('batch_delay', 0.2)
SIM_STEER_TEMPERATURE = simulate_settings.get('steer_temperature', 0.7)
SIM_STEER_MAX_TOKENS = simulate_settings.get('steer_max_tokens', 256)
SIM_MIX_TRAITS = bool(simulate_settings.get('mix_traits', False))

# Assessment options
assess_settings = config_data.get('assess', {}) or {}
ASSESS_JUDGE_MODEL_URL = assess_settings.get('judge_model_url')
ASSESS_JUDGE_MODEL_API_KEY = assess_settings.get('judge_model_api_key')
ASSESS_JUDGE_MODEL_NAME = assess_settings.get('judge_model_name')
ASSESS_TEMPERATURE = assess_settings.get('temperature', 0.0)
ASSESS_MAX_TOKENS = assess_settings.get('max_tokens', 512)


print(f'Loaded simulation: {SIMULATION_CONFIG_FILE} | steering: {STEERING_CONFIG_FILE} | tasks: {STEER_TASKS or "<none>"}')


## Client setup

In [None]:
# Client setup
from collinear.client import Client

if not CLIENT_ASSISTANT_MODEL_API_KEY:
    raise RuntimeError('assistant_model_api_key must be set in configs/simulation_config.json')

client = Client(
    assistant_model_url=CLIENT_ASSISTANT_MODEL_URL,
    assistant_model_api_key=CLIENT_ASSISTANT_MODEL_API_KEY,
    assistant_model_name=CLIENT_ASSISTANT_MODEL_NAME,
    steer_api_key=CLIENT_STEER_API_KEY,
    timeout=CLIENT_TIMEOUT,
    max_retries=CLIENT_MAX_RETRIES,
    rate_limit_retries=CLIENT_RATE_LIMIT_RETRIES,
)


## Simulate samples

In [None]:
# Run simulations
simulations = client.simulate(
    steer_config=STEER_CONFIG,
    k=SIM_SAMPLES,
    num_exchanges=SIM_EXCHANGES,
    batch_delay=SIM_DELAY,
    steer_temperature=SIM_STEER_TEMPERATURE,
    steer_max_tokens=SIM_STEER_MAX_TOKENS,
    mix_traits=SIM_MIX_TRAITS,
)

# Pretty-print the simulated conversations and steer details.
for i, sim in enumerate(simulations, start=1):
    p = sim.steer
    if p is not None:
        print(
            "Steer Persona:\n"
            f"age={p.age}\n"
            f"gender={p.gender}\n"
            f"occupation={p.occupation}\n"
            f"intent={p.intent}\n"
            f"trait={p.trait}" if p.trait else f"traits={p.traits}"
        )
    print("---")
    for msg in sim.conv_prefix:
        role = str(msg.get('role', ''))
        content = str(msg.get('content', ''))
        if content:
            print(f"{role}: {content}")
    print(f"assistant: {sim.response}")
    print()
print('All simulations complete')


## Assess agent in multi-turn setting

In [None]:
# Assess
result = client.assess(
    dataset=simulations,
    judge_model_url=ASSESS_JUDGE_MODEL_URL,
    judge_model_api_key=ASSESS_JUDGE_MODEL_API_KEY,
    judge_model_name=ASSESS_JUDGE_MODEL_NAME,
    temperature=ASSESS_TEMPERATURE,
    max_tokens=ASSESS_MAX_TOKENS,
)
print(f"Assessment: {result.message or '<no message>'}")
for scores_map in result.evaluation_result:
    for scores in scores_map.values():
        print(f"  Score: {scores.score}")
        print(f"  Rationale: {scores.rationale}")


## (optinal) configure and push to prime intellect hub

## load tau-hard bench from prime intellect hub

In [None]:
!curl -LsSf https://astral.sh/uv/install.sh | sh

In [None]:
!uv tool install prime
!uvx prime env install tsach/tau-hard@0.1.0

In [None]:
from verifiers import load_environment
env = load_environment('tau-hard')

## analyze and summarize results