# Running the repo

You might have to add a HF token. Please don't steal my token!

If you want to edit any of the .py files, like benchmark.py or prompts.py, you can edit them directly in colab (Files [on the left bar] -> My Drive -> ....), but then you have to restart the runtime. (runtime -> restart session). Note that you don't have to do the pip install cell afterwards, since the runtime stil exists, it just got reset. Only if you disconnect/delete the runtime (such as if you turn your computer off and come back a few hours later) will you have to re-run the pip install cell.

In [2]:
from dotenv import load_dotenv
from huggingface_hub import login
import os

import importlib
import benchmark
import llm_agent
from llm_providers import create_llm
from persona_loader import list_persona_ids

importlib.reload(benchmark)
importlib.reload(llm_agent)

load_dotenv()
HF_TOKEN = os.environ.get("HF_TOKEN")
# Paste your token inside the quotes
login(HF_TOKEN)


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


## Model and Persona Configuration

This cell sets up the LLM model and optional personas for both codemaster and guesser agents.

**To switch models:** Simply change `MODEL_CHOICE` to one of:
- `"llama31"` - LLaMA 3.1 8B Instruct
- `"gemma2"` - Gemma 2 9B Instruct (recommended)
- `"mistral"` - Mistral 7B Instruct v0.2

**To use personas:** Set `CODEMASTER_PERSONA` and `GUESSER_PERSONA` to persona IDs ("1" through "20")
- Use `None` for no persona (baseline)
- See personas.json for full persona descriptions
- Same persona space applies to both roles

**Key features:**
- Uses proper chat templates for each model (automatically detected)
- Supports 4-bit quantization to save memory
- Shared LLM instance for both agents (memory efficient)
- Persona injection follows NeurIPS 2024 trust game approach

In [3]:
# ===== MODEL CONFIGURATION =====
# Choose which model to use by setting MODEL_CHOICE
# Options: "llama31", "mistral", "gemma2"

MODEL_CHOICE = "gemma2"  # Change this to switch models

MODEL_CONFIGS = {
    "llama31": {
        "type": "local_hf",
        "model_name": "meta-llama/Llama-3.1-8B-Instruct",
        "temperature": 0.6,
        "max_tokens": 1024,
        # "load_in_4bit": True
    },
    "mistral": {
        "type": "local_hf",
        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
        "temperature": 0.7,
        # "load_in_4bit": True
    },
    "gemma2": {
        "type": "local_hf",
        "model_name": "google/gemma-2-9b-it",
        "temperature": 0.7,
        "max_tokens": 2048,
    }
}

In [None]:
# ===== PERSONA CONFIGURATION =====
# Set persona IDs for codemaster and guesser (use None for no persona)
# Available persona IDs: "1" through "20" (see personas.json for details)
#
# Examples of the 4 combinations:
# 1. No personas (baseline):
#    codemaster_persona, guesser_persona = None, None
#
# 2. Only codemaster has persona:
#    codemaster_persona, guesser_persona = "1", None
#
# 3. Only guesser has persona:
#    codemaster_persona, guesser_persona = None, "2"
#
# 4. Both have personas (can be same or different):
#    codemaster_persona, guesser_persona = "1", "3"

codemaster_persona = "1"  # Change to "1", "2", etc. to enable persona
guesser_persona = "2"     # Change to "1", "2", etc. to enable persona

# ===== PERSONA SHARING TOGGLE =====
# When True: agents know each other's personas and can adapt their communication
# When False (default): personas remain isolated, agents unaware of partner's background
shared_persona = True  # Change to True to enable persona sharing

# List all available personas
print("Available personas:", ", ".join(list_persona_ids()))

# Get the selected config
config = MODEL_CONFIGS[MODEL_CHOICE]
print(f"Loading model: {config['model_name']}")
print(f"Codemaster Persona: {codemaster_persona or 'None (baseline)'}")
print(f"Guesser Persona: {guesser_persona or 'None (baseline)'}")
print(f"Persona Sharing: {'Enabled' if shared_persona else 'Disabled'}")

# Create shared LLM instance
shared_llm_instance = create_llm(config)

# Create agents with shared instance and optional personas
codemaster = llm_agent.LLMAgent(model_config=config, llm_instance=shared_llm_instance)
codemaster.initialize_role('codemaster',
                          persona_id=codemaster_persona,
                          partner_persona_id=guesser_persona,
                          shared_persona=shared_persona)

guesser = llm_agent.LLMAgent(model_config=config, llm_instance=shared_llm_instance)
guesser.initialize_role('guesser',
                       persona_id=guesser_persona,
                       partner_persona_id=codemaster_persona,
                       shared_persona=shared_persona)

print(f"✓ Codemaster and Guesser initialized with {MODEL_CHOICE}")

In [None]:
bnch = benchmark.CodeNamesBenchmark()

# Run collaborative game with current model
print(f"\n{'='*60}")
print(f"Running game with {MODEL_CHOICE}: {config['model_name']}")
print(f"{'='*60}\n")

results = bnch.run_collab_matchup(
    config, 
    num_games=1, 
    codemaster=codemaster, 
    guesser=guesser,
    codemaster_persona_id=codemaster_persona,
    guesser_persona_id=guesser_persona,
    persona_sharing=shared_persona,
    save=True  # Save detailed game logs to JSON files
)

print(f"\n{'='*60}")
print(f"Results for {MODEL_CHOICE}")
print(f"{'='*60}")
print(f"Experiment ID: {results['experiment_id']}")
print(f"Codemaster Persona: {results['codemaster_persona_id'] or 'None'}")
print(f"Guesser Persona: {results['guesser_persona_id'] or 'None'}")
print(f"Persona Sharing: {'Enabled' if results['persona_sharing_enabled'] else 'Disabled'}")
print(f"Games played: {results['games_played']}")
print(f"Wins: {results['games_won']}")
print(f"Win rate: {results['win_rate']:.1%}")
print(f"Average turns per game: {results['average_turns']:.1f}")
print(f"Average words per clue: {results['average_words_per_clue']:.2f}")
print(f"Experiment duration: {results['experiment_duration']:.1f}s")

# The full results dict is still available for analysis
# print(results)  # Uncomment to see full results