In [1]:
import json
import numpy as np
import os
import pandas as pd
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import random
from itertools import product
import math
from utils import *
from eval.metrics import compute_pareto_frontier


def parse_values(line):
    """Parse the line containing private values"""
    # Format: "Your private values are 16 for item 1, 46 for item 2, 100 for item 3, 23 for item 4, 47 for item 5."
    values = []
    parts = line.split("are ")[1].split(", ")
    for part in parts:
        value = int(part.split(" for item")[0])
        values.append(value)
    return values

def parse_outside_offer(line):
    """Parse the line containing outside offer"""
    # Format: "Your outside offer value is 145. Your objective..."
    return int(line.split("value is ")[1].split(".")[0])

def calculate_value(items_received, values):
    """Calculate value of items received given the player's values"""
    return sum(items_received[i] * values[i] for i in range(len(values)))

def compute_max_nash_welfare(item_counts, p1_valuations, p2_valuations):
    if len(item_counts) != len(p1_valuations) or len(item_counts) != len(p2_valuations):
        raise ValueError("item_counts, p1_valuations, p2_valuations must have the same length.")

    K = len(item_counts)
    max_nash = -1.0
    best_alloc = None
    outside_offer_player1 = np.random.randint(1, np.dot(item_counts, p1_valuations))
    outside_offer_player2 = np.random.randint(1, np.dot(item_counts, p2_valuations))
    ranges = [range(n_i + 1) for n_i in item_counts] 
    for allocation in product(*ranges):
        p1_util = 0.0
        p2_util = 0.0
        for i in range(K):
            x_i = allocation[i]
            n_i = item_counts[i]
            p1_util += x_i * p1_valuations[i]
            p2_util += (n_i - x_i) * p2_valuations[i]

        w = math.sqrt(max(p1_util, 0) * max(p2_util, 0))

        if w > max_nash:
            max_nash = w
            best_alloc = allocation

        #outside offer check
        if max_nash < np.sqrt(outside_offer_player1 * outside_offer_player2):
            max_nash = np.sqrt(outside_offer_player1 * outside_offer_player2)
            best_alloc = [0, 0, 0, 0, 0]

    return max_nash, list(best_alloc)




        
def analyze_single_game(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    results = []
    for game in data['all_game_data']:
        agent1 = game['agent1']
        agent2 = game['agent2']
        
        # Remove Agent1_ and Agent2_ prefixes if they exist
        if agent1.startswith("Agent1_"):
            agent1 = agent1[7:]
        if agent2.startswith("Agent2_"):
            agent2 = agent2[7:]
        
        # Track the final state
        final_action = None
        final_round_index = len(game['round_data']) - 1
        # Convert to actual round number (2 turns = 1 round)
        final_round_number = (final_round_index // 2) + 1
        final_offer = None
        p1_outside_offer = None
        p2_outside_offer = None
        p1_values = None
        p2_values = None
        num_items = None
        full_items = None

        # First, gather all the necessary data from all rounds
        for round_idx, round_data in enumerate(game['round_data']):
            prompt = round_data['prompt']
            
            # Check if this is the final turn
            if round_idx == final_round_index:
                final_action = round_data['action']
            
            if "You are Player 1" in prompt:
                # Extract Player 1's values and outside offer
                for line in prompt.split('\n'):
                    if "Your private values are" in line:
                        p1_values = parse_values(line)
                    elif "Your outside offer value is" in line:
                        p1_outside_offer = parse_outside_offer(line)
                    elif "There are" in line and "units of item" in line:
                        if num_items is None:
                            # Extract numbers before "units of item"
                            nums = []
                            parts = line.split("There are ")[1].split(", ")
                            for part in parts:
                                num = int(part.split(" units")[0])
                                nums.append(num)
                            num_items = nums
            elif "You are Player 2" in prompt:
                # Extract Player 2's values and outside offer
                for line in prompt.split('\n'):
                    if "Your private values are" in line:
                        p2_values = parse_values(line)
                    elif "Your outside offer value is" in line:
                        p2_outside_offer = parse_outside_offer(line)
            
            # Track the current offer
            if "Current offer on the table" in prompt:
                offer_line = prompt.split("Current offer on the table")[1].split("\n")[0]
                if "None" not in offer_line and "[" in offer_line:
                    final_offer = eval(offer_line[offer_line.find("["):offer_line.find("]")+1])
        
        # Calculate final values
        p1_final_value = None
        p2_final_value = None
        
        # Calculate discount factor based on the round number (0.9^(round-1))
        discount_factor = 0.9 ** (final_round_number - 1)
        # discount_factor = 1
        p1_items = None
        p2_items = None
        if final_action == "WALK" or final_action == "INVALID WALK":
            # Check if this is player 1's first action
            if final_round_number == 1 and "You are Player 1" in game['round_data'][final_round_index]['prompt']:
                # Don't count the game if player 1 walks on their first action
                # p1_final_value = p1_outside_offer * discount_factor
                p1_final_value = None
                p2_final_value = None
            else:
                p1_final_value = p1_outside_offer * discount_factor
                # if p2_outside_offer is None:
                #     # Generate random values between 1 and 101 for each item
                #     random_values = [random.randint(1, 101) for _ in range(len(num_items))]
                #     # Calculate total value if player had all items
                #     total_possible_value = sum(random_values[i] * num_items[i] for i in range(len(num_items)))
                #     # Generate random outside offer between 1 and total value
                #     p2_outside_offer = random.randint(1, total_possible_value)
                p2_final_value = p2_outside_offer * discount_factor
        elif final_action == "ACCEPT":
            # Determine which player accepted the offer
            accepting_player = None
            for round_idx, round_data in enumerate(game['round_data']):
                if round_idx == final_round_index and round_data['action'] == "ACCEPT":
                    accepting_player = 1 if "You are Player 1" in round_data['prompt'] else 2
            
            if accepting_player == 2:
                # Player 2 accepted the offer
                # The offer represents what Player 2 receives
                p2_items = final_offer
                p1_items = [num_items[i] - final_offer[i] for i in range(len(final_offer))]
            else:
                # Player 1 accepted the offer
                # The offer represents what Player 1 receives
                p1_items = final_offer
                p2_items = [num_items[i] - final_offer[i] for i in range(len(final_offer))]
            
            p1_final_value = calculate_value(p1_items, p1_values) * discount_factor
            p2_final_value = calculate_value(p2_items, p2_values) * discount_factor
        try:
            circle_data = data['all_game_data'][0]['circle']
            if isinstance(circle_data, int):
                p1_circle = circle_data
                p2_circle = circle_data
            else:
                p1_circle = circle_data[0]
                p2_circle = circle_data[1]
        except (KeyError, TypeError, IndexError):
            # Handle missing or malformed data
            p1_circle = None
            p2_circle = None
        
        # Add circle values to agent names if available
        agent1_with_circle = f"{agent1}_circle_{p1_circle}" if p1_circle is not None else agent1
        agent2_with_circle = f"{agent2}_circle_{p2_circle}" if p2_circle is not None else agent2
      
        
        results.append({
            'agent1': agent1_with_circle,
            'agent2': agent2_with_circle,
            'final_action': final_action,
            'final_round': final_round_number,
            'discount_factor': discount_factor,
            'agent1_value': p1_final_value, #todo normalize to reduce variance? 
            'agent2_value': p2_final_value, #todo normalize to reduce variance? 
            'p1_values': p1_values,
            'p2_values': p2_values,
            'p1_items': p1_items,
            'p2_items': p2_items,
            'full_items': num_items,
            'num_items': len(p1_values),
            'p1_outside_offer': p1_outside_offer,
            'p1_outside_offer': p2_outside_offer

        })
    
    return results

# # Example usage:
file_path = "crossplay/game_matrix_1/4o_sonnet_3.7_circle_4_vs_circle_6/all_game_data_3_1_2025_20_openai_4o_vs_anthropic_3.7_sonnet_circle_p1_4_circle_p2_6.json"
results = analyze_single_game(file_path)

# Calculate average utility for each agent
agent1_values = [game['agent1_value'] for game in results if game['agent1_value'] is not None]
agent2_values = [game['agent2_value'] for game in results if game['agent2_value'] is not None]

agent1_avg = np.mean(agent1_values) if agent1_values else "N/A"
agent2_avg = np.mean(agent2_values) if agent2_values else "N/A"

# Print detailed information for each game
# print("Game Details:")
# for i, game in enumerate(results):
#     print(f"\nGame {i+1} between {game['agent1']} and {game['agent2']}")
#     print(f"Final action: {game['final_action']}")
#     print(f"Final round: {game['final_round']}")
#     print(f"Discount factor: {game['discount_factor']:.4f}")
#     print(f"Final values: Agent1: {game['agent1_value']:.2f}, Agent2: {game['agent2_value']:.2f}")

print("\nAverage Utilities:")
print(f"Agent1 ({results[0]['agent1']}): {agent1_avg}")
print(f"Agent2 ({results[0]['agent2']}): {agent2_avg}")

Matplotlib created a temporary cache directory at /var/folders/fh/fwc37qhn04d8sxp65hwv1kxm0000gn/T/matplotlib-90rovjz6 because the default path (/Users/gabesmithline/.matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.



Average Utilities:
Agent1 (openai_4o_circle_4): 532.54
Agent2 (anthropic_3.7_sonnet_circle_6): 700.0836842105264


In [2]:
def create_matrix_heatmap_with_marginals(matrix, title="Matrix Heatmap", cmap="coolwarm", 
                                         fmt=".2f", figsize=(14, 12), 
                                         vmin=None, vmax=None, center=None,
                                         annot=True, normalize_marginals=False):
    """
    Create a heatmap visualization of a matrix with row and column marginals.
    
    Parameters:
    - matrix: pandas DataFrame containing the matrix data
    - title: title for the plot
    - cmap: colormap to use
    - fmt: format string for annotations
    - figsize: figure size (width, height)
    - vmin, vmax, center: color scale parameters
    - annot: whether to annotate cells
    - normalize_marginals: whether to normalize marginals (mean=1)
    
    Returns:
    - fig: the matplotlib figure
    """
    import matplotlib.pyplot as plt
    import seaborn as sns
    import numpy as np
    import pandas as pd
    
    # Convert matrix to numeric, replacing non-numeric values with NaN
    numeric_matrix = matrix.apply(pd.to_numeric, errors='coerce')
    
    # Create mask for missing values
    mask = np.isnan(numeric_matrix)
    
    # Print verification of means calculation for debugging
    print(f"Calculating means for {title}:")
    print("Example row (openai_4o_circle_4 if available):")
    if 'openai_4o_circle_4' in numeric_matrix.index:
        row_vals = numeric_matrix.loc['openai_4o_circle_4'].dropna()
        print(f"Values: {row_vals.values}")
        print(f"Mean: {row_vals.mean()}")
    
    # Calculate row and column means (ignoring NaN values)
    row_means = numeric_matrix.mean(axis=1, skipna=True)
    col_means = numeric_matrix.mean(axis=0, skipna=True)
    
    # Normalize if requested
    if normalize_marginals:
        row_mean_value = row_means.mean()
        col_mean_value = col_means.mean()
        if not np.isnan(row_mean_value) and row_mean_value != 0:
            row_means = row_means / row_mean_value
        if not np.isnan(col_mean_value) and col_mean_value != 0:
            col_means = col_means / col_mean_value
    
    # Create a figure with gridspec for layout
    fig = plt.figure(figsize=figsize)
    gs = fig.add_gridspec(7, 6)  # Increased to 7 rows to accommodate column means at bottom
    
    # Main heatmap (central 4x4 grid)
    ax_heatmap = fig.add_subplot(gs[1:5, 0:4])
    
    # Row marginal (right side)
    ax_row = fig.add_subplot(gs[1:5, 4:6])
    
    # Column marginal (bottom)
    ax_col = fig.add_subplot(gs[5:6, 0:4])  # Moved to bottom
    
    # Title area
    ax_title = fig.add_subplot(gs[0:1, 0:6])  # Expanded to full width
    ax_title.axis('off')
    
    # Colorbar area
    ax_cbar = fig.add_subplot(gs[6:7, :])
    ax_cbar.axis('off')
    
    # Draw the heatmap with mask for NaN values
    hm = sns.heatmap(numeric_matrix, ax=ax_heatmap, cmap=cmap, annot=annot, fmt=fmt,
                cbar=False, vmin=vmin, vmax=vmax, center=center,
                linewidths=0.5, mask=mask)
    
    # Move x-axis labels to top
    ax_heatmap.xaxis.tick_top()
    
    # Make column labels bold and match size of row labels
    ax_heatmap.set_xticklabels(
        numeric_matrix.columns, 
        rotation=80, 
        ha='center', 
        rotation_mode='anchor', 
        fontweight='bold',
    )
    
    ax_heatmap.tick_params(axis='x', pad=90)  # Changed from 15 to 25 to move labels higher

    
    # Make row labels bold
    ax_heatmap.set_yticklabels(
        ax_heatmap.get_yticklabels(),
        fontweight='bold'
    )
    
    # Row marginal (horizontal bars)
    if not row_means.isna().all():  # Check if all values are NaN
        valid_row_means = row_means.dropna()
        if len(valid_row_means) > 0 and valid_row_means.max() > valid_row_means.min():
            color_norm = (valid_row_means - valid_row_means.min()) / (valid_row_means.max() - valid_row_means.min())
            colors = plt.cm.get_cmap(cmap)(color_norm)
        else:
            colors = plt.cm.get_cmap(cmap)(0.5 * np.ones_like(valid_row_means))
        
        y_pos = np.arange(len(row_means)) + 0.5  # Center on cells
        ax_row.barh(y_pos, row_means, color=colors, edgecolor='k', linewidth=0.5, height=0.8)
        
        # Add text labels with the actual values
        for i, value in enumerate(row_means):
            if not np.isnan(value):
                ax_row.text(value + (row_means.max() * 0.02), i + 0.5, f"{value:.2f}", 
                         va='center', fontsize=9)
    
    ax_row.set_yticks([])
    ax_row.set_xlabel('Row Mean')
    ax_row.spines['top'].set_visible(False)
    ax_row.spines['right'].set_visible(False)
    ax_row.spines['left'].set_visible(False)
    ax_row.set_ylim(ax_heatmap.get_ylim())  # Match heatmap y-limits
    
    # Column marginal (horizontal bars at bottom)
    if not col_means.isna().all():  # Check if all values are NaN
        valid_col_means = col_means.dropna()
        if len(valid_col_means) > 0 and valid_col_means.max() > valid_col_means.min():
            color_norm = (valid_col_means - valid_col_means.min()) / (valid_col_means.max() - valid_col_means.min())
            colors = plt.cm.get_cmap(cmap)(color_norm)
        else:
            colors = plt.cm.get_cmap(cmap)(0.5 * np.ones_like(valid_col_means))
        
        x_pos = np.arange(len(col_means)) + 0.5  # Center on cells
        ax_col.bar(x_pos, col_means, color=colors, edgecolor='k', linewidth=0.5, width=0.8)
        
        # Add text labels with the actual values
        for i, value in enumerate(col_means):
            if not np.isnan(value):
                ax_col.text(i + 0.5, value + (col_means.max() * 0.02), f"{value:.2f}", 
                         ha='center', fontsize=9, rotation=0)
    
    ax_col.set_xticks([])
    ax_col.set_ylabel('Column Mean')
    ax_col.spines['top'].set_visible(False)
    ax_col.spines['right'].set_visible(False)
    ax_col.spines['bottom'].set_visible(False)
    ax_col.set_xlim(ax_heatmap.get_xlim())  # Match heatmap x-limits
    
    # Add a colorbar at the bottom
    cbar_ax = fig.add_axes([0.15, 0.08, 0.7, 0.02])  # Adjusted position
    sm = plt.cm.ScalarMappable(cmap=cmap)
    sm.set_array([])
    
    # Set colorbar limits
    vmin_actual = vmin if vmin is not None else numeric_matrix.min().min()
    vmax_actual = vmax if vmax is not None else numeric_matrix.max().max()
    
    if center is not None:
        max_abs = max(abs(vmax_actual), abs(vmin_actual))
        sm.set_clim(-max_abs, max_abs)
    else:
        sm.set_clim(vmin_actual, vmax_actual)
        
    plt.colorbar(sm, cax=cbar_ax, orientation='horizontal')
    
    # Add title - centered and bolded
    ax_title.text(0.95, 0.5, title, 
              horizontalalignment='right',
              verticalalignment='center', 
              fontsize=16, 
              fontweight='bold',
              transform=ax_title.transAxes)
    
    plt.tight_layout()
    return fig


## Global Max Values for Comparison

In [3]:
global_max_nash_welfare = []
for i in range(1000):
    items = np.random.poisson(4, 5)

    player_values1 = np.random.randint(1, 101, 5) 
    player_values2 = np.random.randint(1, 101, 5)
  
    max_nash, _ = compute_max_nash_welfare(items, player_values1, player_values2)
    global_max_nash_welfare.append(max_nash)

global_max_nash_welfare = np.mean(global_max_nash_welfare)

global_stadard_max = []
for i in range(1000):
    items = np.random.poisson(4, 5)

    player_values = np.random.randint(1, 101, 5) 
    
    global_stadard_max.append(np.dot(items, player_values))

global_stadard_max = np.mean(global_max_nash_welfare)


In [4]:
# ... existing code ...

# Define bidirectional mappings with dated and clean versions
model_mapping = {
    # Map from original form to dated form (for data loading)
    'openai_4o_circle_4': 'openai_4o_2024-08-06_circle_4',
    'openai_4o_circle_5': 'openai_4o_2024-08-06_circle_5',
    'openai_4o_circle_6': 'openai_4o_2024-08-06_circle_6',
    
    'anthropic_3.7_sonnet_circle_5': 'anthropic_3.7_sonnet_2025-02-19_circle_5',
    'anthropic_3.7_sonnet_circle_6': 'anthropic_3.7_sonnet_2025-02-19_circle_6',
    
    'openai_o3_mini_circle_0': 'openai_o3_mini_2025-01-31_circle_0',
}

# Create reverse mapping (from dated to clean)
reverse_model_mapping = {v: k for k, v in model_mapping.items()}

def get_canonical_name(agent_name):
    """Convert agent names to canonical format for data loading"""
    return model_mapping.get(agent_name, agent_name)

def get_display_name(agent_name):
    """Convert dated agent names to clean display names without dates"""
    # First check if this is a dated name in our reverse mapping
    if agent_name in reverse_model_mapping:
        return reverse_model_mapping[agent_name]
    
    # Try to extract and remove date pattern
    import re
    date_pattern = r'_\d{4}-\d{2}-\d{2}'
    if re.search(date_pattern, agent_name):
        return re.sub(date_pattern, '', agent_name)
    
    # Return original if no transformation needed
    return agent_name

all_results = []
crossplay_dir = "crossplay/game_matrix_1"
agent_performance = defaultdict(lambda: defaultdict(list))
agent_final_rounds = defaultdict(list) 
agent_game_counts = defaultdict(lambda: defaultdict(int))  

for root, dirs, files in os.walk(crossplay_dir):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            try:
                game_results = analyze_single_game(file_path)
                
                if any('openai_4o_circle_2' in result['agent1'] or 'openai_4o_circle_2' in result['agent2'] 
                       for result in game_results):
                    print(f"Skipping {file_path} - contains openai_4o_circle_2")
                    continue
                
                for result in game_results:
                    result['agent1'] = get_canonical_name(result['agent1'])
                    result['agent2'] = get_canonical_name(result['agent2'])
                
                all_results.extend(game_results)
                
                for result in game_results:
                    agent1 = result['agent1'] 
                    agent2 = result['agent2']
                    
                    if result['agent1_value'] is not None:
                        agent_performance[agent1][agent2].append(result['agent1_value'])
                        agent_game_counts[agent1][agent2] += 1  
                    
                    if result['agent2_value'] is not None:
                        agent_performance[agent2][agent1].append(result['agent2_value'])
                        agent_game_counts[agent2][agent1] += 1  
                    
                    # Track final rounds for each agent
                    if result['final_round'] is not None:
                        agent_final_rounds[agent1].append(result['final_round'])
                        agent_final_rounds[agent2].append(result['final_round'])
                
                agent1_values = [r['agent1_value'] for r in game_results if r['agent1_value'] is not None]
                agent2_values = [r['agent2_value'] for r in game_results if r['agent2_value'] is not None]
                
                agent1_avg = np.mean(agent1_values) if agent1_values else "N/A"
                agent2_avg = np.mean(agent2_values) if agent2_values else "N/A"
                
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

df = pd.DataFrame(all_results)
print(df.columns)

agent_vs_opponent_performance = {}

for agent, opponents in agent_performance.items():
    for opponent, values in opponents.items():
        if values:
            key = f"{agent}_vs_{opponent}"
            agent_vs_opponent_performance[key] = np.mean(values)


overall_agent_performance = {}
for agent, opponents in agent_performance.items():
    all_values = []
    for values in opponents.values():
        all_values.extend(values)
    if all_values:
        overall_agent_performance[agent] = np.mean(all_values)

average_final_rounds = {}
for agent, rounds in agent_final_rounds.items():
    if rounds:
        average_final_rounds[agent] = np.mean(rounds)

print(f"\nTotal games analyzed: {len(all_results)}")
print(f"Unique agent types: {len(overall_agent_performance)}")
print(f"Unique matchups: {len(agent_vs_opponent_performance)}")

print("\nAverage Final Rounds by Agent:")
for agent, avg_round in sorted(average_final_rounds.items()):
    print(f"{agent}: {avg_round:.2f}")

all_agents = sorted(list(overall_agent_performance.keys()))
performance_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
std_dev_matrix = pd.DataFrame(index=all_agents, columns=all_agents)  
variance_matrix = pd.DataFrame(index=all_agents, columns=all_agents)


count_matrix = pd.DataFrame(index=all_agents, columns=all_agents)

for agent, opponents in agent_performance.items():
    for opponent, values in opponents.items():
        if values:
            performance_matrix.loc[agent, opponent] = np.mean(values)
            std_dev_matrix.loc[agent, opponent] = np.std(values)
            variance_matrix.loc[agent, opponent] = np.nanvar(values)
            count_matrix.loc[agent, opponent] = agent_game_counts[agent][opponent]




Skipping crossplay/game_matrix_1/gemini_4o_circle_5_vs_circle_2/all_game_data_2_23_2025_20_gemini_2.0_vs_4o_circle_p1_5_circle_p2_2.json - contains openai_4o_circle_2
Index(['agent1', 'agent2', 'final_action', 'final_round', 'discount_factor',
       'agent1_value', 'agent2_value', 'p1_values', 'p2_values', 'p1_items',
       'p2_items', 'full_items', 'num_items', 'p1_outside_offer'],
      dtype='object')

Total games analyzed: 2181
Unique agent types: 9
Unique matchups: 81

Average Final Rounds by Agent:
anthropic_3.7_sonnet_2025-02-19_circle_5: 2.05
anthropic_3.7_sonnet_2025-02-19_circle_6: 1.99
anthropic_sonnet_3.7_reasoning_2025-02-19_circle_0: 2.06
gemini_2.0_flash_circle_2: 1.79
gemini_2.0_flash_circle_5: 2.11
openai_4o_2024-08-06_circle_4: 1.70
openai_4o_2024-08-06_circle_5: 1.62
openai_4o_2024-08-06_circle_6: 1.73
openai_o3_mini_2025-01-31_circle_0: 1.75


In [12]:

nash_welfare_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
utilitarian_welfare_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
rawls_welfare_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
mad_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
gini_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
variance_welfare_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
cv_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
jain_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
envy_free_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
ef1_matrix = pd.DataFrame(index=all_agents, columns=all_agents)
pareto_matrix = pd.DataFrame(index=all_agents, columns=all_agents)

count_matrix = pd.DataFrame(index=all_agents, columns=all_agents)

for agent, opponents in agent_performance.items():
    for opponent, values in opponents.items():
        if values:
            performance_matrix.loc[agent, opponent] = np.mean(values)
            std_dev_matrix.loc[agent, opponent] = np.std(values)
            variance_matrix.loc[agent, opponent] = np.nanvar(values)
            count_matrix.loc[agent, opponent] = agent_game_counts[agent][opponent]


welfare_matrices = [
    nash_welfare_matrix, utilitarian_welfare_matrix, rawls_welfare_matrix, 
    mad_matrix, gini_matrix, variance_welfare_matrix, cv_matrix, 
    jain_matrix, envy_free_matrix, ef1_matrix, pareto_matrix
]

for matrix in welfare_matrices:
    matrix.values[:] = np.nan 

processed_pairs = set()  

for agent1_idx, agent1 in enumerate(all_agents):
    for agent2_idx, agent2 in enumerate(all_agents):
        # For welfare metrics, only process each unique pair once
        # Skip if we've already processed this pair (in reverse order)
        pair_key = tuple(sorted([agent1, agent2]))
        
        if pair_key in processed_pairs and agent1 != agent2:
            continue
            
        processed_pairs.add(pair_key)
        
        agent1_vs_agent2_games = [result for result in all_results 
                               if ((result['agent1'] == agent1 and result['agent2'] == agent2) or 
                                   (result['agent1'] == agent2 and result['agent2'] == agent1))]
        
        nash_values = []
        utilitarian_values = []
        rawls_values = []
        mad_values = []
        gini_values = []
        variance_values = []
        cv_values = []
        jain_values = []
        
        envy_free_count = 0
        ef1_count = 0
        pareto_count = 0
        valid_allocation_count = 0
        
        for game in agent1_vs_agent2_games:
            if game['agent1_value'] is not None and game['agent2_value'] is not None:
                if game['agent1'] == agent1 and game['agent2'] == agent2:
                    v1 = game['agent1_value']
                    v2 = game['agent2_value']
                    p1_items = game['p1_items']
                    p2_items = game['p2_items']
                    p1_values = game['p1_values']
                    p2_values = game['p2_values']
                    is_on_pareto = game.get('PF', False)
                else:
                    v2 = game['agent1_value']
                    v1 = game['agent2_value']
                    p2_items = game['p1_items']
                    p1_items = game['p2_items']
                    p2_values = game['p1_values']
                    p1_values = game['p2_values']
                    is_on_pareto = game.get('PF', False)
                
                mean_utility = (v1 + v2) / 2
                
                nash_welfare = np.sqrt(v1 * v2) / global_max_nash_welfare
                
                utilitarian_welfare = v1 + v2
                
                rawls_welfare = min(v1, v2)
                
                mad = abs(v1 - v2)
                
                gini = abs(v1 - v2) / (2 * 2 * mean_utility) if mean_utility > 0 else 0
                
                variance = ((v1 - mean_utility)**2 + (v2 - mean_utility)**2) / 2
                
                cv = np.sqrt(variance) / mean_utility if mean_utility > 0 else 0
                
                jain = 1 / (1 + cv**2) if cv > 0 else 1
                
                nash_values.append(nash_welfare)
                utilitarian_values.append(utilitarian_welfare)
                rawls_values.append(rawls_welfare)
                mad_values.append(mad)
                gini_values.append(gini)
                variance_values.append(variance)
                cv_values.append(cv)
                jain_values.append(jain)
                
                valid_allocation_count += 1
                
                if is_on_pareto:
                    pareto_count += 1
                elif game['final_action'] == "ACCEPT" and p1_items is not None and p2_items is not None and game['num_items'] is not None:
                    try:
                        full_items = game.get('full_items', game['num_items'])
                        
                        p1_values_np = np.array(p1_values)
                        p2_values_np = np.array(p2_values)
                        num_items = len(p1_values)
                        items_np = np.array(full_items)
                        outside_offers = [game.get('p1_outside_offer', np.dot(p1_values_np, items_np)), game.get('p2_outside_offer', np.dot(p2_values_np, full_items))]

                        
                        frontier = compute_pareto_frontier(p1_values_np, p2_values_np, num_items, items_np, outside_offers)
                        
                        is_pareto_optimal = False
                        for alloc in frontier:
                            if alloc['type'] == 'allocation':
                                if alloc['agent1'] == p1_items and alloc['agent2'] == p2_items:
                                    is_pareto_optimal = True
                                    break
                            elif alloc['type'] == 'outside_offer':
                                if game['final_action'] == "WALK" and abs(alloc['agent1_value'] - v1) < 0.01 and abs(alloc['agent2_value'] - v2) < 0.01:
                                    is_pareto_optimal = True
                                    break
                                    
                        if is_pareto_optimal:
                            pareto_count += 1
                            
                    except Exception as e:
                        print(f"Error calculating Pareto optimality: {e}")
                
                if game['final_action'] == "ACCEPT" and p1_items is not None and p2_items is not None and p1_values is not None and p2_values is not None:
                    
                    p1_own_bundle_value = sum(p1_values[i] * p1_items[i] for i in range(len(p1_values)))
                    
                    p1_other_bundle_value = sum(p1_values[i] * p2_items[i] for i in range(len(p1_values)))
                    
                    p1_is_envy_free = p1_own_bundle_value >= p1_other_bundle_value
                    
                    p2_own_bundle_value = sum(p2_values[i] * p2_items[i] for i in range(len(p2_values)))
                    
                    p2_other_bundle_value = sum(p2_values[i] * p1_items[i] for i in range(len(p2_values)))
                    
                    p2_is_envy_free = p2_own_bundle_value >= p2_other_bundle_value
                    
                    if p1_is_envy_free and p2_is_envy_free:
                        envy_free_count += 1
                    
                    p1_is_ef1 = p1_is_envy_free 
                    
                    if not p1_is_envy_free:
                        for j in range(len(p1_values)):
                            if p2_items[j] > 0:
                                
                                adjusted_value = p1_other_bundle_value - (p1_values[j] * 1)  # Remove 1 unit
                                if p1_own_bundle_value >= adjusted_value:
                                    p1_is_ef1 = True
                                    break
                    
                    p2_is_ef1 = p2_is_envy_free  
                    
                    if not p2_is_envy_free:
                        for j in range(len(p2_values)):
                            if p1_items[j] > 0:
                                adjusted_value = p2_other_bundle_value - (p2_values[j] * 1)  # Remove 1 unit
                                if p2_own_bundle_value >= adjusted_value:
                                    p2_is_ef1 = True
                                    break
                    
                    if p1_is_ef1 and p2_is_ef1:
                        ef1_count += 1
        
        if nash_values:
            welfare_value = np.mean(nash_values)
            nash_welfare_matrix.loc[agent1, agent2] = welfare_value
            nash_welfare_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(utilitarian_values)
            utilitarian_welfare_matrix.loc[agent1, agent2] = welfare_value
            utilitarian_welfare_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(rawls_values)
            rawls_welfare_matrix.loc[agent1, agent2] = welfare_value
            rawls_welfare_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(mad_values)
            mad_matrix.loc[agent1, agent2] = welfare_value
            mad_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(gini_values)
            gini_matrix.loc[agent1, agent2] = welfare_value
            gini_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(variance_values)
            variance_welfare_matrix.loc[agent1, agent2] = welfare_value
            variance_welfare_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(cv_values)
            cv_matrix.loc[agent1, agent2] = welfare_value
            cv_matrix.loc[agent2, agent1] = welfare_value
            
            welfare_value = np.mean(jain_values)
            jain_matrix.loc[agent1, agent2] = welfare_value
            jain_matrix.loc[agent2, agent1] = welfare_value
        
        # Calculate envy-freeness proportions
        if valid_allocation_count > 0:
            ef_value = envy_free_count / valid_allocation_count
            envy_free_matrix.loc[agent1, agent2] = ef_value
            envy_free_matrix.loc[agent2, agent1] = ef_value
            
            ef1_value = ef1_count / valid_allocation_count
            ef1_matrix.loc[agent1, agent2] = ef1_value
            ef1_matrix.loc[agent2, agent1] = ef1_value
            
            # Add Pareto optimality proportion
            pareto_value = pareto_count / valid_allocation_count
            pareto_matrix.loc[agent1, agent2] = pareto_value
            pareto_matrix.loc[agent2, agent1] = pareto_value

## Clean Naming Conventions

In [None]:
def clean_matrix_names(matrix):
    clean_matrix = matrix.copy()
    
    clean_matrix.index = [get_display_name(agent) for agent in matrix.index]
    clean_matrix.columns = [get_display_name(agent) for agent in matrix.columns]
    
    return clean_matrix

# Clean all matrices before visualization
matrices_to_clean = [
    performance_matrix, 
    nash_welfare_matrix,
    utilitarian_welfare_matrix, 
    rawls_welfare_matrix, 
    mad_matrix,
    gini_matrix, 
    variance_welfare_matrix, 
    cv_matrix, 
    jain_matrix,
    envy_free_matrix, 
    ef1_matrix, 
    pareto_matrix
]

cleaned_matrices = {}
for i, matrix_name in enumerate([
    "performance_matrix", 
    "nash_welfare_matrix",
    "utilitarian_welfare_matrix", 
    "rawls_welfare_matrix", 
    "mad_matrix",
    "gini_matrix", 
    "variance_welfare_matrix", 
    "cv_matrix", 
    "jain_matrix",
    "envy_free_matrix", 
    "ef1_matrix", 
    "pareto_matrix"
]):
    cleaned_matrices[matrix_name] = clean_matrix_names(matrices_to_clean[i])

performance_matrix = cleaned_matrices["performance_matrix"]
nash_welfare_matrix = cleaned_matrices["nash_welfare_matrix"]
utilitarian_welfare_matrix = cleaned_matrices["utilitarian_welfare_matrix"]
rawls_welfare_matrix = cleaned_matrices["rawls_welfare_matrix"]
mad_matrix = cleaned_matrices["mad_matrix"]
gini_matrix = cleaned_matrices["gini_matrix"]
variance_welfare_matrix = cleaned_matrices["variance_welfare_matrix"]
cv_matrix = cleaned_matrices["cv_matrix"]
jain_matrix = cleaned_matrices["jain_matrix"]
envy_free_matrix = cleaned_matrices["envy_free_matrix"]
ef1_matrix = cleaned_matrices["ef1_matrix"]
pareto_matrix = cleaned_matrices["pareto_matrix"]

In [None]:
performance_matrix

## Welfare Metrics

In [None]:
std_dev_matrix

In [None]:
variance_matrix

In [None]:
nash_welfare_matrix

In [None]:
utilitarian_welfare_matrix

In [None]:
rawls_welfare_matrix

In [None]:
mad_matrix


In [None]:
gini_matrix


In [None]:
variance_welfare_matrix


In [None]:
cv_matrix

In [None]:
jain_matrix

In [None]:
envy_free_matrix

In [None]:
ef1_matrix

In [None]:
pareto_matrix

In [None]:
for matrix_name, matrix in [
    ("Performance Matrix", performance_matrix),
    ("Nash Welfare Matrix", nash_welfare_matrix),
    ("Utilitarian Welfare Matrix", utilitarian_welfare_matrix),
    ("Rawls Welfare Matrix", rawls_welfare_matrix),
    ("Gini Coefficient Matrix", gini_matrix),
    ("Jain's Fairness Index Matrix", jain_matrix),
    ("Envy-Free Matrix", envy_free_matrix),
    ("EF1 Matrix", ef1_matrix),
    ("Pareto-Front-Count", pareto_matrix)
]:
    if matrix_name in ["Performance Matrix", "Nash Welfare Matrix", 
                       "Utilitarian Welfare Matrix", 
                       "Rawls Welfare Matrix"]:
        cmap = "viridis" 
        center = None
    elif matrix_name in ["Gini Coefficient Matrix"]:
        cmap = "coolwarm_r"
        center = 0.5
    elif matrix_name in ["Jain's Fairness Index Matrix", "Envy-Free Matrix", "EF1 Matrix"]:
        cmap = "viridis"
        center = None
    else:
        cmap = "coolwarm"
        center = 0
    
    fig = create_matrix_heatmap_with_marginals(matrix, title=matrix_name, cmap=cmap, center=center)
    plt.show()

## Visuals of Cooperation

### Cooperation Graphs

In [None]:
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

#TODO: look at leader follower dynamics 

def create_leader_perspective_graph(welfare_matrix, top_n=2):
    """Shows who each agent cooperates best with when they are the leader"""
    G = nx.DiGraph()
    agents = welfare_matrix.columns.tolist()
    
    for agent in agents:
        G.add_node(agent)
    
    for leader in agents:
        welfare_values = welfare_matrix.loc[leader]
        
        top_followers = welfare_values.sort_values(ascending=False)
        
        count = 0
        for follower, welfare in top_followers.items():
            if count < top_n and not np.isnan(welfare):  # Removed follower != leader check
                G.add_edge(leader, follower, weight=welfare)
                count += 1
    
    plt.figure(figsize=(14, 10))
    pos = nx.spring_layout(G, seed=42, k=0.8)
    nx.draw_networkx_nodes(G, pos, node_size=3000, node_color='#8ED1FC', alpha=0.8)
    nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
    
    regular_edges = [(u, v) for u, v in G.edges() if u != v]
    nx.draw_networkx_edges(G, pos, edgelist=regular_edges, width=2, alpha=0.7, 
                          edge_color='red', connectionstyle='arc3,rad=0.1', arrowsize=15)
    
    self_loops = [(u, v) for u, v in G.edges() if u == v]
    nx.draw_networkx_edges(G, pos, edgelist=self_loops, width=2, alpha=0.7,
                          edge_color='orange', connectionstyle='arc3,rad=0.3', arrowsize=15, style='dashed')
    
    edge_labels = {(u, v): f"{G[u][v]['weight']:.2f}" for u, v in G.edges()}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=9)
    
    plt.title("Leader Perspective: Who I Cooperate Best With When Leading", fontsize=16)
    plt.axis('off')
    plt.tight_layout()
    return plt

def create_follower_perspective_graph(welfare_matrix, top_n=2):
    """Shows who each agent cooperates best with when they are the follower"""
    G = nx.DiGraph()
    agents = welfare_matrix.columns.tolist()
    
    for agent in agents:
        G.add_node(agent)
    
    for follower in agents:
        welfare_values = welfare_matrix[follower]  # Column values
        
        top_leaders = welfare_values.sort_values(ascending=False)
        
        # Add edges FROM follower TO leader (follower → leader)
        count = 0
        for leader, welfare in top_leaders.items():
            if count < top_n and not np.isnan(welfare):  
                
                G.add_edge(follower, leader, weight=welfare)
                count += 1
    
    # Create plot
    plt.figure(figsize=(14, 10))
    pos = nx.spring_layout(G, seed=42, k=0.8)
    nx.draw_networkx_nodes(G, pos, node_size=3000, node_color='#ABE7DB', alpha=0.8)
    nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')
    
    # Draw regular edges
    regular_edges = [(u, v) for u, v in G.edges() if u != v]
    nx.draw_networkx_edges(G, pos, edgelist=regular_edges, width=2, alpha=0.7, 
                          edge_color='red', connectionstyle='arc3,rad=0.1', arrowsize=15)
    
    # Draw self-loops with a different style
    self_loops = [(u, v) for u, v in G.edges() if u == v]
    nx.draw_networkx_edges(G, pos, edgelist=self_loops, width=2, alpha=0.7,
                          edge_color='#FF6900', connectionstyle='arc3,rad=0.3', arrowsize=15, style='dashed')
    
    edge_labels = {(u, v): f"{G[u][v]['weight']:.2f}" for u, v in G.edges()}
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=9)
    
    plt.title("Follower Perspective: Who I Follow Best", fontsize=16)
    plt.axis('off')
    plt.tight_layout()
    return plt

leader_plot = create_leader_perspective_graph(performance_matrix, top_n=1)
leader_plot.show()


follower_plot = create_follower_perspective_graph(performance_matrix, top_n=1)
follower_plot.show()



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from graphviz import Digraph

# Assuming performance_matrix is your existing pandas DataFrame
# Create a new Graphviz graph
dot = Digraph(comment='Strategy Response Graph', format='png')
dot.attr(rankdir='LR', size='12,8', dpi='300', bgcolor='white')
dot.attr('node', shape='box', style='filled,rounded', fillcolor='lightblue', 
         fontname='Arial', fontsize='14', margin='0.4,0.3')  # Increased margin to prevent text cutoff

# Get strategy names from your DataFrame
strategy_names = performance_matrix.columns.tolist()

# Add nodes with cleaner labels (shorten if needed)
for strategy in strategy_names:
    # Create a shorter label if the strategy name is too long
    short_name = strategy
    # if len(strategy) > 15:  # Reduced character limit to ensure text fits
    #     short_name = strategy[:12] + "..."
    
    # Add more padding by using HTML-like label with line breaks if needed
    dot.node(strategy, short_name, width='2.5', height='0.8')

# For each strategy (column), find the best response (highest value in column)
for strategy in strategy_names:
    # Get the column values
    column_values = performance_matrix[strategy]
    
    # Find the index of the best response (highest value)
    best_response = column_values.idxmax()
    best_value = column_values.max()
    
    # Add an edge from the strategy to its best response
    dot.edge(strategy, best_response, label=f" {best_value:.2f}", color='#3366FF')

# Render the graph
dot.render('strategy_graph_graphviz', view=True, cleanup=True)

# If you want to display it in the notebook
from IPython.display import Image, display
display(Image('strategy_graph_graphviz.png'))

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from graphviz import Digraph

def create_complete_best_response_graph(performance_matrix, filename='complete_best_response'):
    """
    Create a complete best response graph showing both player perspectives
    
    Parameters:
    - performance_matrix: DataFrame containing payoffs (rows are P1 strategies, columns are P2 strategies)
    - filename: base name for output files
    """
    # Create a new Graphviz graph
    dot = Digraph(comment='Complete Best Response Graph', format='png')
    dot.attr(rankdir='LR', size='14,10', dpi='300', bgcolor='white')
    dot.attr('node', shape='box', style='filled,rounded', fontname='Arial', fontsize='14', margin='0.4,0.3')
    
    # Get strategy names
    strategy_names = performance_matrix.columns.tolist()
    
    # Add nodes with distinct colors based on which player has best response
    for strategy in strategy_names:
        # Add the node once - we'll color edges instead of nodes to show perspectives
        dot.node(strategy, strategy, width='2.5', height='0.8', fillcolor='lightblue')
    
    # Add P1 best responses (respond to P2's strategies - column perspective)
    for p2_strategy in strategy_names:
        # Get the column values - what P1 gets against P2's strategy
        p1_payoffs = performance_matrix[p2_strategy]
        
        # Find P1's best response
        p1_best_response = p1_payoffs.idxmax()
        p1_best_value = p1_payoffs.max()
        
        # Add edge from P2 strategy to P1's best response (P2 → P1 best response)
        dot.edge(p2_strategy, p1_best_response, 
                label=f" P1:{p1_best_value:.2f}", 
                color='#3366FF',  # Blue for P1's best responses
                fontcolor='#3366FF')
    
    # Add P2 best responses (respond to P1's strategies - row perspective)
    # We need to transpose performance_matrix to get P2's perspective
    p2_matrix = performance_matrix.transpose()  # Now rows are P2 strategies, columns are P1 strategies
    
    for p1_strategy in strategy_names:
        # Get the column values - what P2 gets against P1's strategy
        p2_payoffs = p2_matrix[p1_strategy]
        
        # Find P2's best response
        p2_best_response = p2_payoffs.idxmax()
        p2_best_value = p2_payoffs.max()
        
        # Add edge from P1 strategy to P2's best response (P1 → P2 best response)
        dot.edge(p1_strategy, p2_best_response, 
                label=f" P2:{p2_best_value:.2f}", 
                color='#FF6347',  # Red for P2's best responses
                fontcolor='#FF6347')
    
    # Render the graph
    dot.render(filename, view=True, cleanup=True)
    
    # Return the Digraph object for potential further customization
    return dot

# Example usage:
best_response_graph = create_complete_best_response_graph(performance_matrix)

## Statistical Analysis

In [None]:
def analyze_game_progression(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    progression_data = []
    
    for game_idx, game in enumerate(data['all_game_data']):
        agent1 = game['agent1']
        agent2 = game['agent2']
        
        # Remove Agent1_ and Agent2_ prefixes if they exist
        if agent1.startswith("Agent1_"):
            agent1 = agent1[7:]
        if agent2.startswith("Agent2_"):
            agent2 = agent2[7:]
        
        # Extract circle values
        try:
            circle_data = game['circle']
            if isinstance(circle_data, int):
                p1_circle = circle_data
                p2_circle = circle_data
            else:
                p1_circle = circle_data[0]
                p2_circle = circle_data[1]
        except (KeyError, TypeError, IndexError):
            p1_circle = None
            p2_circle = None
        
        # Add circle values to agent names
        agent1_with_circle = f"{agent1}_circle_{p1_circle}" if p1_circle is not None else agent1
        agent2_with_circle = f"{agent2}_circle_{p2_circle}" if p2_circle is not None else agent2
        
        # Extract private values and outside offers
        p1_values = None
        p2_values = None
        p1_outside_offer = None
        p2_outside_offer = None
        num_items = None
        
        game_rounds = []
        current_offer = None
        
        for round_idx, round_data in enumerate(game['round_data']):
            round_number = (round_idx // 2) + 1
            turn_in_round = (round_idx % 2) + 1
            action = round_data['action']
            prompt = round_data['prompt']
            
            # Determine active player
            active_player = agent1_with_circle if "You are Player 1" in prompt else agent2_with_circle
            
            # Extract values and outside offers first time we see them
            if p1_values is None and "You are Player 1" in prompt:
                for line in prompt.split('\n'):
                    if "Your private values are" in line:
                        p1_values = parse_values(line)
                    elif "Your outside offer value is" in line:
                        p1_outside_offer = parse_outside_offer(line)
                    elif "There are" in line and "units of item" in line:
                        if num_items is None:
                            nums = []
                            parts = line.split("There are ")[1].split(", ")
                            for part in parts:
                                num = int(part.split(" units")[0])
                                nums.append(num)
                            num_items = nums
            
            if p2_values is None and "You are Player 2" in prompt:
                for line in prompt.split('\n'):
                    if "Your private values are" in line:
                        p2_values = parse_values(line)
                    elif "Your outside offer value is" in line:
                        p2_outside_offer = parse_outside_offer(line)
            
            # Extract current offer on the table
            offer_before_action = None
            if "Current offer on the table" in prompt:
                offer_line = prompt.split("Current offer on the table")[1].split("\n")[0]
                if "None" not in offer_line and "[" in offer_line:
                    offer_before_action = eval(offer_line[offer_line.find("["):offer_line.find("]")+1])
            
            # Process the action
            if action.startswith("OFFER"):
                try:
                    new_offer = eval(action.replace("OFFER ", ""))
                    current_offer = new_offer
                except:
                    new_offer = "INVALID OFFER"
            elif action == "ACCEPT":
                # Calculate utilities based on current offer
                p1_items = None
                p2_items = None
                
                if turn_in_round == 1:  # Player 1 accepting
                    p1_items = offer_before_action
                    p2_items = [num_items[i] - offer_before_action[i] for i in range(len(offer_before_action))]
                else:  # Player 2 accepting
                    p2_items = offer_before_action
                    p1_items = [num_items[i] - offer_before_action[i] for i in range(len(offer_before_action))]
                
                # Calculate utilities
                discount_factor = 0.9 ** (round_number - 1)
                p1_utility = calculate_value(p1_items, p1_values) * discount_factor if p1_items else None
                p2_utility = calculate_value(p2_items, p2_values) * discount_factor if p2_items else None
            elif action == "WALK" or action == "INVALID WALK":
                # Calculate outside offer values
                discount_factor = 0.9 ** (round_number - 1)
                p1_utility = p1_outside_offer * discount_factor if p1_outside_offer else None
                p2_utility = p2_outside_offer * discount_factor if p2_outside_offer else None
            else:
                p1_utility = None
                p2_utility = None
            
            # Calculate utility if the offer was accepted at this point
            potential_p1_utility = None
            potential_p2_utility = None
            
            if current_offer is not None and p1_values and p2_values:
                discount_factor = 0.9 ** (round_number - 1)
                if turn_in_round == 1:  # From Player 1's perspective
                    p1_potential_items = current_offer
                    p2_potential_items = [num_items[i] - current_offer[i] for i in range(len(current_offer))]
                else:  # From Player 2's perspective
                    p2_potential_items = current_offer
                    p1_potential_items = [num_items[i] - current_offer[i] for i in range(len(current_offer))]
                
                potential_p1_utility = calculate_value(p1_potential_items, p1_values) * discount_factor
                potential_p2_utility = calculate_value(p2_potential_items, p2_values) * discount_factor
            
            # Track results for this round
            round_info = {
                'game_id': game_idx,
                'round': round_number,
                'turn': turn_in_round,
                'active_player': active_player,
                'action': action,
                'offer_before_action': offer_before_action,
                'current_offer': current_offer,
                'p1_utility': p1_utility,
                'p2_utility': p2_utility,
                'potential_p1_utility': potential_p1_utility,
                'potential_p2_utility': potential_p2_utility,
                'discount_factor': 0.9 ** (round_number - 1),
            }
            
            game_rounds.append(round_info)
        
        # Add game-level data
        game_data = {
            'game_id': game_idx,
            'agent1': agent1_with_circle,
            'agent2': agent2_with_circle,
            'p1_values': p1_values,
            'p2_values': p2_values,
            'p1_outside_offer': p1_outside_offer,
            'p2_outside_offer': p2_outside_offer,
            'num_items': num_items,
            'rounds': game_rounds,
            'total_rounds': len(game_rounds) // 2,
            'final_action': game_rounds[-1]['action'] if game_rounds else None,
        }
        
        progression_data.append(game_data)
    

# Example usage and analysis
file_path = "crossplay/game_matrix_1/4o_sonnet_3.7_circle_4_vs_circle_6/all_game_data_3_1_2025_20_openai_4o_vs_anthropic_3.7_sonnet_circle_p1_4_circle_p2_6.json"
progression_results = analyze_game_progression(file_path)

print(progression_results)
# Now we can analyze patterns in the negotiation progression
def analyze_negotiation_dynamics(progression_results):
    # Convert to DataFrame for easier analysis
    rounds_data = []
    for game in progression_results:
        for round_info in game['rounds']:
            round_data = {**round_info}
            round_data['agent1'] = game['agent1']
            round_data['agent2'] = game['agent2']
            rounds_data.append(round_data)
    
    rounds_df = pd.DataFrame(rounds_data)
    
    # 1. Analyze offer evolution over time
    offer_evolution = rounds_df[rounds_df['action'].str.startswith('OFFER')]
    
    # 2. Calculate how offers change (concessions)
    # This requires some additional processing of the offers
    
    # 3. When do agents walk away?
    walks_by_round = rounds_df[rounds_df['action'].isin(['WALK', 'INVALID WALK'])].groupby('round').size()
    
    # 4. Acceptance rates by round
    accepts_by_round = rounds_df[rounds_df['action'] == 'ACCEPT'].groupby('round').size()
    
    # 5. Analysis of first offers
    first_offers = rounds_df[rounds_df['turn'] == 1].groupby('game_id').first()
    
    return {
        'rounds_df': rounds_df,
        'offer_evolution': offer_evolution,
        'walks_by_round': walks_by_round,
        'accepts_by_round': accepts_by_round,
        'first_offers': first_offers
    }

# Run the analysis
dynamics = analyze_negotiation_dynamics(progression_results)

# Visualize some of the results
plt.figure(figsize=(12, 6))

# Walk and accept rates by round
plt.subplot(1, 2, 1)
dynamics['walks_by_round'].plot(kind='bar', color='red', alpha=0.7)
plt.title('Walk-away Frequency by Round')
plt.xlabel('Round Number')
plt.ylabel('Count')

plt.subplot(1, 2, 2)
dynamics['accepts_by_round'].plot(kind='bar', color='green', alpha=0.7)
plt.title('Acceptance Frequency by Round')
plt.xlabel('Round Number')
plt.ylabel('Count')

plt.tight_layout()
plt.show()

# Utility progression over rounds
plt.figure(figsize=(12, 6))
for game_id in dynamics['rounds_df']['game_id'].unique():
    game_data = dynamics['rounds_df'][dynamics['rounds_df']['game_id'] == game_id]
    plt.plot(game_data['round'], game_data['potential_p1_utility'], 'b-', alpha=0.3)
    plt.plot(game_data['round'], game_data['potential_p2_utility'], 'r-', alpha=0.3)

plt.title('Potential Utility Evolution Over Rounds')
plt.xlabel('Round')
plt.ylabel('Utility')
plt.legend(['Player 1', 'Player 2'])
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import stats
import seaborn as sns
from matplotlib.patches import Patch

def visualize_negotiation_dynamics(progression_results, metric='utility', agent_pair=None, 
                                  show_individual_games=True, plot_confidence=True):
    """
    Visualize negotiation dynamics over rounds
    
    Parameters:
    - progression_results: list of game data from analyze_game_progression
    - metric: 'utility', 'nash', 'fairness_index', 'pareto', etc.
    - agent_pair: tuple of (agent1, agent2) to filter for specific agent pairs
    - show_individual_games: whether to show individual game lines
    - plot_confidence: whether to show confidence intervals
    """
    # Filter for specific agent pair if requested
    if agent_pair:
        filtered_games = [g for g in progression_results 
                         if g['agent1'] == agent_pair[0] and g['agent2'] == agent_pair[1]]
    else:
        filtered_games = progression_results
    
    if not filtered_games:
        print(f"No games found for the specified agent pair: {agent_pair}")
        return
    
    # Prepare data for plotting
    plot_data = []
    
    for game in filtered_games:
        for round_info in game['rounds']:
            # Skip if not an offer action
            if not round_info['action'].startswith('COUNTEROFFER'):
                continue
                
            round_num = round_info['round']
            turn = round_info['turn']
            
            # Adjust x-position based on turn (Player 1 at whole numbers, Player 2 at .5)
            x_pos = round_num if turn == 1 else round_num - 0.5
            
            # Calculate values based on the selected metric
            if metric == 'utility':
                p1_value = round_info['potential_p1_utility']
                p2_value = round_info['potential_p2_utility']
            elif metric == 'nash':
                p1_value = round_info['potential_p1_utility']
                p2_value = round_info['potential_p2_utility']
                if p1_value is not None and p2_value is not None and p1_value > 0 and p2_value > 0:
                    nash_value = np.sqrt(p1_value * p2_value)  # Geometric mean for Nash welfare
                    p1_value = nash_value
                    p2_value = nash_value
            elif metric == 'fairness_index':
                p1_value = round_info['potential_p1_utility']
                p2_value = round_info['potential_p2_utility']
                if p1_value is not None and p2_value is not None and (p1_value + p2_value) > 0:
                    # Jain's fairness index
                    fairness = (p1_value + p2_value)**2 / (2 * (p1_value**2 + p2_value**2))
                    p1_value = fairness
                    p2_value = fairness
            elif metric == 'relative_share':
                p1_value = round_info['potential_p1_utility']
                p2_value = round_info['potential_p2_utility']
                if p1_value is not None and p2_value is not None and (p1_value + p2_value) > 0:
                    p1_value = p1_value / (p1_value + p2_value)
                    p2_value = p2_value / (p1_value + p2_value)
            
            # Add data point
            if p1_value is not None and p2_value is not None:
                plot_data.append({
                    'game_id': game['game_id'],
                    'round': x_pos,
                    'player': 1 if turn == 1 else 2,  # Which player is making the offer
                    'p1_value': p1_value,
                    'p2_value': p2_value,
                    'active_player': round_info['active_player'],
                    'offer': round_info['current_offer']
                })
    
    # Convert to DataFrame
    df = pd.DataFrame(plot_data)
    
    if df.empty:
        print("No valid data to plot after filtering")
        return
    
    # Calculate summary statistics for each round position
    round_positions = df['round'].unique()
    round_positions.sort()
    
    p1_means = []
    p1_lower = []
    p1_upper = []
    p2_means = []
    p2_lower = []
    p2_upper = []
    
    for pos in round_positions:
        pos_data = df[df['round'] == pos]
        
        # P1 stats
        p1_mean = pos_data['p1_value'].mean()
        p1_means.append(p1_mean)
        
        if len(pos_data) >= 2:
            p1_std = pos_data['p1_value'].std()
            p1_sem = p1_std / np.sqrt(len(pos_data))
            p1_lower.append(p1_mean - 1.96 * p1_sem)  # 95% confidence interval
            p1_upper.append(p1_mean + 1.96 * p1_sem)
        else:
            p1_lower.append(p1_mean)
            p1_upper.append(p1_mean)
        
        # P2 stats
        p2_mean = pos_data['p2_value'].mean()
        p2_means.append(p2_mean)
        
        if len(pos_data) >= 2:
            p2_std = pos_data['p2_value'].std()
            p2_sem = p2_std / np.sqrt(len(pos_data))
            p2_lower.append(p2_mean - 1.96 * p2_sem)
            p2_upper.append(p2_mean + 1.96 * p2_sem)
        else:
            p2_lower.append(p2_mean)
            p2_upper.append(p2_mean)
    
    # Create plot
    plt.figure(figsize=(14, 8))
    
    # Individual game lines (transparent)
    if show_individual_games:
        for game_id in df['game_id'].unique():
            game_df = df[df['game_id'] == game_id]
            
            # Player 1's value when player 1 is active (red circle)
            p1_active = game_df[game_df['player'] == 1]
            plt.plot(p1_active['round'], p1_active['p1_value'], 'ro-', alpha=0.2, markersize=8)
            
            # Player 2's value when player 1 is active (blue square)
            plt.plot(p1_active['round'], p1_active['p2_value'], 'bs-', alpha=0.2, markersize=8)
            
            # Player 1's value when player 2 is active (red square)
            p2_active = game_df[game_df['player'] == 2]
            plt.plot(p2_active['round'], p2_active['p1_value'], 'rs-', alpha=0.2, markersize=8)
            
            # Player 2's value when player 2 is active (blue circle)
            plt.plot(p2_active['round'], p2_active['p2_value'], 'bo-', alpha=0.2, markersize=8)
    
    # Mean lines
    # Player 1's mean values (red)
    plt.plot(round_positions, p1_means, 'r-', linewidth=3, label="Player 1's value")
    
    # Player 2's mean values (blue)
    plt.plot(round_positions, p2_means, 'b-', linewidth=3, label="Player 2's value")
    
    # Confidence intervals with shading
    if plot_confidence:
        plt.fill_between(round_positions, p1_lower, p1_upper, color='r', alpha=0.2)
        plt.fill_between(round_positions, p2_lower, p2_upper, color='b', alpha=0.2)
    
    # Mark rounds by player
    p1_rounds = [r for r in round_positions if r.is_integer()]
    p2_rounds = [r for r in round_positions if not r.is_integer()]
    
    for r in p1_rounds:
        plt.axvline(x=r, color='lightgray', linestyle='--', alpha=0.5)
    
    for r in p2_rounds:
        plt.axvline(x=r, color='lightgray', linestyle=':', alpha=0.5)
    
    # Titles and labels
    metric_name = metric.replace('_', ' ').title()
    plt.title(f'Evolution of {metric_name} During Negotiation', fontsize=16)
    plt.xlabel('Round Number', fontsize=14)
    plt.ylabel(metric_name, fontsize=14)
    
    # Create custom legend
    legend_elements = [
        Patch(facecolor='red', edgecolor='red', label="Player 1's value"),
        Patch(facecolor='blue', edgecolor='blue', label="Player 2's value"),
        Patch(facecolor='none', edgecolor='red', label="95% confidence P1"),
        Patch(facecolor='none', edgecolor='blue', label="95% confidence P2"),
        Patch(facecolor='lightgray', edgecolor='lightgray', label="Player 1's turn (integer rounds)"),
        Patch(facecolor='lightgray', edgecolor='lightgray', hatch='/', label="Player 2's turn (half rounds)")
    ]
    
    plt.legend(handles=legend_elements, loc='best', fontsize=12)
    
    # Set x-ticks to show rounds clearly
    max_round = int(np.ceil(max(round_positions)))
    plt.xticks(np.arange(1, max_round + 1, 0.5))
    
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    
    # Return the plot object for potential further customization
    return plt

# Example usage:
# Basic utility plot
utility_plot = visualize_negotiation_dynamics(progression_results, metric='utility')
utility_plot.show()

# Fairness evolution
fairness_plot = visualize_negotiation_dynamics(progression_results, metric='fairness_index')
fairness_plot.show()

# Nash welfare
nash_plot = visualize_negotiation_dynamics(progression_results, metric='nash')
nash_plot.show()

# Filter for specific agent pair
specific_pair_plot = visualize_negotiation_dynamics(
    progression_results, 
    metric='utility',
    agent_pair=('openai_4o_circle_5', 'anthropic_3.7_sonnet_circle_6')
)
specific_pair_plot.show()