In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import json
from datetime import datetime
import os
from typing import List, Dict, Tuple, Optional
from collections import defaultdict
from IPython.display import display, HTML
import pandas as pd
from dataclasses import dataclass
from enum import Enum, auto

In [2]:
class ModelType(Enum):
    PHI = "phi"
    LLAMA = "llama"

### Model Configs

In [8]:
@dataclass
class ModelConfig:
    model_path: str
    model_type: ModelType
    bias: str  # 'left' or 'right'
    
    @property
    def friendly_name(self) -> str:
        return f"{self.model_type.value.title()}-{self.bias}"
    
    @staticmethod
    def get_config(model_type: ModelType, bias: str) -> 'ModelConfig':
        #base_path = "/app/models"
        base_path = r"C:\Users\isaac\dev\LLM_Model_Bias\src\models"
        
        if model_type == ModelType.PHI:
            return ModelConfig(
                model_path=f"{base_path}/phi-1.5-{bias}",
                model_type=ModelType.PHI,
                bias=bias
            )
        elif model_type == ModelType.LLAMA:
            return ModelConfig(
                model_path=f"{base_path}/Llama-3.2-1B-{bias}",
                model_type=ModelType.LLAMA,
                bias=bias
            )
        else:
            raise ValueError(f"Unknown model type: {model_type}")

### Create Arena

In [4]:
class DebateArena:
    def __init__(
        self,
        model1_type: ModelType = ModelType.PHI,
        model1_bias: str = "left",
        model2_type: ModelType = ModelType.PHI,
        model2_bias: str = "right",
        max_turns: int = 10,
        max_length: int = 1024,
        max_new_tokens: int = 170,  
        temperature: float = 0.7,
        history_window: int = 2  
    ):
        self.max_turns = max_turns
        self.max_length = max_length
        self.max_new_tokens = max_new_tokens
        self.temperature = temperature
        self.history_window = history_window
        
        self.model1_config = ModelConfig.get_config(model1_type, model1_bias)
        self.model2_config = ModelConfig.get_config(model2_type, model2_bias)
        
        self.current_debate = None
        self.debate_history = []
        
        self._load_tokenizers()
        print("Tokenizers loaded successfully!")

    def _load_tokenizers(self):
        tokenizer_kwargs = {"trust_remote_code": True} if self.model1_config.model_type == ModelType.PHI else {}
        self.tokenizer1 = AutoTokenizer.from_pretrained(
            self.model1_config.model_path,
            **tokenizer_kwargs
        )
        if self.tokenizer1.pad_token is None:
            self.tokenizer1.pad_token = self.tokenizer1.eos_token
            
        tokenizer_kwargs = {"trust_remote_code": True} if self.model2_config.model_type == ModelType.PHI else {}
        self.tokenizer2 = AutoTokenizer.from_pretrained(
            self.model2_config.model_path,
            **tokenizer_kwargs
        )
        if self.tokenizer2.pad_token is None:
            self.tokenizer2.pad_token = self.tokenizer2.eos_token

    def _load_model(self, config: ModelConfig):
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
        model_kwargs = {
            "torch_dtype": torch.float16,
            "low_cpu_mem_usage": True
        }
        
        if config.model_type == ModelType.PHI:
            model_kwargs["trust_remote_code"] = True
            
        model = AutoModelForCausalLM.from_pretrained(
            config.model_path,
            **model_kwargs
        )
        
        if torch.cuda.is_available():
            model = model.half().cuda()
            
        return model

    def _create_pipeline(self, model, tokenizer):
        return pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            device=0 if torch.cuda.is_available() else -1
        )

    def _get_truncated_history(self, conversation_history: str, current_turn: int) -> str:
        lines = conversation_history.split('\n')
        
        # keep the topic and initial prompt
        context = lines[:2]
        
        # Get recent conversation turns based on window size
        turn_pairs = []
        turn_count = 0
        for line in reversed(lines[2:]):
            if line.strip():
                turn_pairs.append(line)
                turn_count += 1
                if turn_count >= self.history_window * 2:
                    break
                    
        context.extend(reversed(turn_pairs))
        
        return '\n'.join(context)

    def generate_response(
        self,
        config: ModelConfig,
        tokenizer,
        prompt: str,
        conversation_history: str,
        current_turn: int
    ) -> str:
        try:
            print(f"Loading {config.friendly_name} model...")
            model = self._load_model(config)
            pipeline = self._create_pipeline(model, tokenizer)
            
            truncated_history = self._get_truncated_history(conversation_history, current_turn)
            
            if config.model_type == ModelType.PHI:
                full_prompt = f"{truncated_history}\n\nBased on the recent conversation above, provide a focused response to the topic:"
            else:
                full_prompt = f"{truncated_history}\nRespond to the recent points in the discussion:"
            
            outputs = pipeline(
                full_prompt,
                max_new_tokens=self.max_new_tokens,
                min_new_tokens=50,
                temperature=self.temperature,
                do_sample=True,
                num_return_sequences=1,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                return_full_text=False,
                repetition_penalty=1.2,  
                no_repeat_ngram_size=3  
            )
            
            if isinstance(outputs, list) and len(outputs) > 0:
                response = outputs[0].get('generated_text', '').strip()
                
                if response:
                    sentences = response.split('.')
                    if len(sentences) > 1:
                        response = '.'.join(sentences[:-1]) + '.' # Filter for complete sentences
            else:
                response = "I apologize, but I couldn't generate a coherent response."
            
            # Free up memory
            del model
            del pipeline
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                
            return response
            
        except Exception as e:
            print(f"Error generating response: {str(e)}")
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            return "I apologize, but I encountered an error in generating a response."

    def conduct_debate(
        self,
        topic: str,
        initial_prompt: str,
        display_live: bool = True
    ) -> None:
        debate_history = []
        metadata = {
            "topic": topic,
            "initial_prompt": initial_prompt,
            "timestamp": datetime.now().isoformat(),
            "max_turns": self.max_turns,
            "temperature": self.temperature,
            "max_length": self.max_length,
            "max_new_tokens": self.max_new_tokens,
            "history_window": self.history_window,
            "model1": self.model1_config.friendly_name,
            "model2": self.model2_config.friendly_name
        }
        
        conversation = f"Topic: {topic}\nInitial Prompt: {initial_prompt}\n"
        
        if display_live:
            display(HTML(f"""
                <h3>Debate on: {topic}</h3>
                <p><strong>Initial Prompt:</strong> {initial_prompt}</p>
                <p><strong>Models:</strong> {self.model1_config.friendly_name} vs {self.model2_config.friendly_name}</p>
                <p><strong>Max tokens per response:</strong> {self.max_new_tokens}</p>
                <p><strong>History window:</strong> {self.history_window} turns</p>
            """))
        
        for turn in range(self.max_turns):
            if display_live:
                display(HTML(f"<h4>Turn {turn + 1}/{self.max_turns}</h4>"))
            
            # Model 1 turn
            response1 = self.generate_response(
                self.model1_config,
                self.tokenizer1,
                initial_prompt,
                conversation,
                turn
            )
            debate_history.append({
                "turn": turn + 1,
                "speaker": self.model1_config.friendly_name,
                "response": response1
            })
            conversation += f"\n{self.model1_config.friendly_name}: {response1}"
            if display_live:
                display(HTML(f"<p><strong>{self.model1_config.friendly_name}:</strong> {response1}</p>"))
            
            # Model 2 turn
            response2 = self.generate_response(
                self.model2_config,
                self.tokenizer2,
                initial_prompt,
                conversation,
                turn
            )
            debate_history.append({
                "turn": turn + 1,
                "speaker": self.model2_config.friendly_name,
                "response": response2
            })
            conversation += f"\n{self.model2_config.friendly_name}: {response2}"
            if display_live:
                display(HTML(f"<p><strong>{self.model2_config.friendly_name}:</strong> {response2}</p>"))
        
        self.current_debate = {
            "metadata": metadata,
            "debate_history": debate_history
        }
        self.debate_history.append(self.current_debate)
    
    def save_debate(self, output_dir: str = "/app/debate_results") -> str:
        if not self.current_debate:
            raise ValueError("No debate to save! Run conduct_debate first.")
        
        os.makedirs(output_dir, exist_ok=True)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        topic_slug = self.current_debate["metadata"]["topic"].lower().replace(" ", "_")
        filename = f"{topic_slug}_debate_{timestamp}.json"
        
        output_path = os.path.join(output_dir, filename)
        with open(output_path, "w") as f:
            json.dump(self.current_debate, f, indent=2)
        
        print(f"\nDebate saved to: {output_path}")
        return output_path

    def get_debate_summary(self, debate_index: int = -1) -> HTML:
        if not self.debate_history:
            raise ValueError("No debates to analyze! Run conduct_debate first.")
        
        debate = self.debate_history[debate_index]
        metadata = debate["metadata"]
        history = debate["debate_history"]
        
        summary = f"""
        <h3>Debate Summary</h3>
        <p><strong>Topic:</strong> {metadata['topic']}</p>
        <p><strong>Initial Prompt:</strong> {metadata['initial_prompt']}</p>
        <p><strong>Models:</strong> {metadata['model1']} vs {metadata['model2']}</p>
        <p><strong>Number of Turns:</strong> {metadata['max_turns']}</p>
        <p><strong>Temperature:</strong> {metadata['temperature']}</p>
        <p><strong>Timestamp:</strong> {metadata['timestamp']}</p>
        """
        
        stats = defaultdict(lambda: {"responses": [], "word_counts": []})
        
        for entry in history:
            speaker = entry["speaker"]
            response = entry["response"]
            word_count = len(response.split())
            
            stats[speaker]["responses"].append(response)
            stats[speaker]["word_counts"].append(word_count)
        
        stats_html = "<h3>Response Statistics</h3><table><tr><th>Model</th><th>Avg Words</th><th>Total Responses</th></tr>"
        for speaker, data in stats.items():
            avg_words = sum(data["word_counts"]) / len(data["word_counts"])
            stats_html += f"<tr><td>{speaker}</td><td>{avg_words:.1f}</td><td>{len(data['responses'])}</td></tr>"
        stats_html += "</table>"
        
        return HTML(summary + stats_html)

    def get_content_analysis(self, debate_index: int = -1) -> HTML:
        if not self.debate_history:
            raise ValueError("No debates to analyze! Run conduct_debate first.")
        
        debate = self.debate_history[debate_index]
        history = debate["debate_history"]
        
        patterns = defaultdict(lambda: defaultdict(int))
        
        for entry in history:
            speaker = entry["speaker"]
            response = entry["response"].lower()
            
            patterns[speaker]["questions"] += response.count("?")
            
            if any(word in response for word in ["agree", "yes", "indeed"]):
                patterns[speaker]["agreement"] += 1
            if any(word in response for word in ["disagree", "no", "however"]):
                patterns[speaker]["disagreement"] += 1
            
            if any(word in response for word in ["data", "research", "study", "evidence"]):
                patterns[speaker]["evidence_based"] += 1
            
            if any(word in response for word in ["feel", "believe", "think", "opinion"]):
                patterns[speaker]["emotional"] += 1
        
        analysis = "<h3>Content Analysis</h3>"
        analysis += "<table><tr><th>Model</th><th>Pattern</th><th>Count</th></tr>"
        
        for speaker in patterns:
            for pattern, count in patterns[speaker].items():
                analysis += f"<tr><td>{speaker}</td><td>{pattern.replace('_', ' ').title()}</td><td>{count}</td></tr>"
        
        analysis += "</table>"
        
        return HTML(analysis)

    def get_full_transcript(self, debate_index: int = -1) -> HTML:
        if not self.debate_history:
            raise ValueError("No debates to analyze! Run conduct_debate first.")
        
        debate = self.debate_history[debate_index]
        metadata = debate["metadata"]
        history = debate["debate_history"]
        
        transcript = f"""
        <h3>Full Debate Transcript</h3>
        <p><strong>Topic:</strong> {metadata['topic']}</p>
        <p><strong>Initial Prompt:</strong> {metadata['initial_prompt']}</p>
        <p><strong>Models:</strong> {metadata['model1']} vs {metadata['model2']}</p>
        """
        
        current_turn = None
        for entry in history:
            if entry["turn"] != current_turn:
                current_turn = entry["turn"]
                transcript += f"<h4>Turn {current_turn}</h4>"
            
            transcript += f"<p><strong>{entry['speaker']}:</strong> {entry['response']}</p>"
        
        return HTML(transcript)

In [10]:
def run_debate(
    topic: str,
    prompt: str,
    model1_type: ModelType = ModelType.PHI,
    model1_bias: str = "left",
    model2_type: ModelType = ModelType.PHI,
    model2_bias: str = "right",
    max_turns: int = 3,
    max_new_tokens: int = 150, # Cap length of each response to keep even lengths
    temperature: float = 0.7,
    history_window: int = 2 # Number of previous turns to keep in context
) -> DebateArena:
    """Run a debate and return the arena instance"""
    arena = DebateArena(
        model1_type=model1_type,
        model1_bias=model1_bias,
        model2_type=model2_type,
        model2_bias=model2_bias,
        max_turns=max_turns,
        max_new_tokens = max_new_tokens,
        history_window=history_window,
        temperature=temperature,

    )
    
    arena.conduct_debate(topic, prompt)
    arena.save_debate(output_dir=r"C:\Users\isaac\dev\LLM_Model_Bias\src\debate_results")
    
    return arena

In [6]:
# 1. Phi vs Phi
arena = run_debate(
    topic="Climate Change",
    prompt="Is climate change a major issue? If so, how can we address it?",
    max_turns=3
)

Tokenizers loaded successfully!


Loading Phi-left model...


Loading Phi-right model...


Loading Phi-left model...


Loading Phi-right model...


Loading Phi-left model...


Loading Phi-right model...



Debate saved to: /app/debate_results/climate_change_debate_20241125_045629.json


In [7]:
display(arena.get_debate_summary())
display(arena.get_content_analysis())
# display(arena.get_full_transcript())

Model,Avg Words,Total Responses
Phi-left,82.0,3
Phi-right,78.3,3


Model,Pattern,Count
Phi-left,Questions,1
Phi-left,Disagreement,2
Phi-left,Agreement,1
Phi-right,Questions,0
Phi-right,Disagreement,2
Phi-right,Emotional,2


In [None]:
# 2. Phi Left vs LLaMA Right
# arena_mixed = run_debate(
#     topic="Gun Control",
#     prompt="What should be done about gun violence in America?",
#     model1_type=ModelType.PHI,
#     model1_bias="left",
#     model2_type=ModelType.LLAMA,
#     model2_bias="right",
#     max_turns=3
# )

In [22]:
# 3. Llama Left vs Llama Right
llama_arena = run_debate(
    topic="climate change",
    prompt="Should the US impose stricter regulations on carbon emissions and stop fracking to combat climate change, even at the cost of economic growth?",
    model1_type=ModelType.LLAMA,
    model1_bias="left",
    model2_type=ModelType.LLAMA,
    model2_bias="right",
    max_turns=3
)

Tokenizers loaded successfully!


Loading Llama-left model...


Loading Llama-right model...


Loading Llama-left model...


Loading Llama-right model...


Loading Llama-left model...


Loading Llama-right model...



Debate saved to: C:\Users\isaac\dev\LLM_Model_Bias\src\debate_results\climate_change_debate_20241203_164858.json


In [23]:
display(llama_arena.get_debate_summary())
display(llama_arena.get_content_analysis())

Model,Avg Words,Total Responses
Llama-left,115.7,3
Llama-right,95.7,3


Model,Pattern,Count
Llama-left,Questions,4
Llama-left,Disagreement,2
Llama-left,Evidence Based,2
Llama-left,Emotional,2
Llama-right,Questions,4
Llama-right,Agreement,2
Llama-right,Disagreement,3
Llama-right,Emotional,1
