In [1]:
import os
import pandas as pd
res_csv_path = r"C:\Users\bahar\Repositories\llm-ta-aied26\results"
flat_csv_name = "flat_reasoning_alignment_analysis.csv"
flat_csv_path = os.path.join(res_csv_path, flat_csv_name)

os.path.isfile(flat_csv_path)

True

## util code

In [4]:
"""
Agent Coding Pattern Analysis
This script analyzes coding patterns for three agents (Ava, Ben, Cam) 
across multiple rounds of coding decisions.
"""

import pandas as pd
import json
from collections import Counter, defaultdict

def load_and_parse_data(filepath):
    """Load CSV and parse the codes column."""
    df = pd.read_csv(filepath)
    
    # Parse the codes column (string representation of dict to actual dict)
    df['codes_dict'] = df['codes'].apply(lambda x: eval(x))
    
    return df

def analyze_overall_patterns(df):
    """Analyze overall coding patterns for each agent."""
    print("=" * 80)
    print("CODING PATTERN ANALYSIS BY AGENT")
    print("=" * 80)
    
    # Extract individual agent codes
    all_codes = {
        'Ava': [],
        'Ben': [],
        'Cam': []
    }
    
    for _, row in df.iterrows():
        codes = row['codes_dict']
        for agent, code in codes.items():
            all_codes[agent].append(code)
    
    # Count code frequencies for each agent
    results = {}
    for agent in ['Ava', 'Ben', 'Cam']:
        print(f"\n{agent}'s Coding Pattern:")
        print("-" * 40)
        code_counts = Counter(all_codes[agent])
        total = len(all_codes[agent])
        
        results[agent] = {}
        # Sort by frequency
        for code, count in code_counts.most_common():
            percentage = (count / total) * 100
            results[agent][code] = {'count': count, 'percentage': percentage}
            print(f"  {code:8s}: {count:3d} times ({percentage:5.1f}%)")
    
    return results, all_codes

def analyze_by_round(df):
    """Analyze coding patterns by round."""
    print("\n" + "=" * 80)
    print("CODING PATTERN BY ROUND")
    print("=" * 80)
    
    round_patterns = {}
    for round_num in sorted(df['round'].unique()):
        round_df = df[df['round'] == round_num]
        print(f"\nRound {round_num}:")
        print("-" * 40)
        
        round_patterns[round_num] = {}
        for agent in ['Ava', 'Ben', 'Cam']:
            codes_in_round = [row['codes_dict'][agent] for _, row in round_df.iterrows()]
            code_counts = Counter(codes_in_round)
            round_patterns[round_num][agent] = dict(code_counts)
            print(f"  {agent}: {dict(code_counts)}")
    
    return round_patterns

def analyze_agreements(df):
    """Analyze agreement patterns between agents."""
    print("\n" + "=" * 80)
    print("AGREEMENT PATTERNS")
    print("=" * 80)
    
    agreement_counts = {
        'all_agree': 0,
        'ava_ben_agree': 0,
        'ava_cam_agree': 0,
        'ben_cam_agree': 0,
        'all_disagree': 0
    }
    
    for _, row in df.iterrows():
        codes = row['codes_dict']
        ava, ben, cam = codes['Ava'], codes['Ben'], codes['Cam']
        
        if ava == ben == cam:
            agreement_counts['all_agree'] += 1
        elif ava == ben:
            agreement_counts['ava_ben_agree'] += 1
        elif ava == cam:
            agreement_counts['ava_cam_agree'] += 1
        elif ben == cam:
            agreement_counts['ben_cam_agree'] += 1
        else:
            agreement_counts['all_disagree'] += 1
    
    total_cases = len(df)
    for pattern, count in agreement_counts.items():
        percentage = (count / total_cases) * 100
        print(f"{pattern:20s}: {count:3d} times ({percentage:5.1f}%)")
    
    return agreement_counts

def analyze_transitions(df):
    """Analyze code transitions between rounds for each agent."""
    print("\n" + "=" * 80)
    print("CODE TRANSITIONS BY AGENT (Round to Round)")
    print("=" * 80)
    
    all_transitions = {}
    
    for agent in ['Ava', 'Ben', 'Cam']:
        print(f"\n{agent}'s Transitions:")
        transitions = defaultdict(lambda: defaultdict(int))
        
        # Group by row_index
        for row_idx in df['row_index'].unique():
            row_data = df[df['row_index'] == row_idx].sort_values('round')
            codes = [row['codes_dict'][agent] for _, row in row_data.iterrows()]
            
            # Track transitions between consecutive rounds
            for i in range(len(codes) - 1):
                transitions[codes[i]][codes[i+1]] += 1
        
        all_transitions[agent] = transitions
        
        # Display transitions
        for from_code in sorted(transitions.keys()):
            print(f"  From {from_code}:")
            for to_code, count in sorted(transitions[from_code].items(), key=lambda x: -x[1]):
                print(f"    → {to_code}: {count} times")
    
    return all_transitions

def calculate_stability_metrics(transitions):
    """Calculate stability metrics for each agent."""
    print("\n" + "=" * 80)
    print("STABILITY METRICS")
    print("=" * 80)
    
    for agent, trans in transitions.items():
        print(f"\n{agent}'s Stability:")
        print("-" * 40)
        
        for code in sorted(trans.keys()):
            total_from_code = sum(trans[code].values())
            stayed_same = trans[code].get(code, 0)
            stability = (stayed_same / total_from_code * 100) if total_from_code > 0 else 0
            
            print(f"  {code:8s}: {stayed_same}/{total_from_code} stayed same ({stability:5.1f}%)")

def analyze_similarity_scores(df):
    """Analyze similarity scores between agents."""
    print("\n" + "=" * 80)
    print("SIMILARITY SCORE ANALYSIS")
    print("=" * 80)
    
    # Average similarity scores
    avg_similarities = {
        'Ava_Ben': df['sim_Ava_Ben'].mean(),
        'Ava_Cam': df['sim_Ava_Cam'].mean(),
        'Ben_Cam': df['sim_Ben_Cam'].mean()
    }
    
    print("\nAverage Similarity Scores:")
    print("-" * 40)
    for pair, score in avg_similarities.items():
        print(f"  {pair:10s}: {score:.4f}")
    
    # Similarity by round
    print("\nSimilarity by Round:")
    print("-" * 40)
    for round_num in sorted(df['round'].unique()):
        round_df = df[df['round'] == round_num]
        print(f"\n  Round {round_num}:")
        print(f"    Ava-Ben: {round_df['sim_Ava_Ben'].mean():.4f}")
        print(f"    Ava-Cam: {round_df['sim_Ava_Cam'].mean():.4f}")
        print(f"    Ben-Cam: {round_df['sim_Ben_Cam'].mean():.4f}")
    
    return avg_similarities



## run 

In [5]:
flat_df = load_and_parse_data(flat_csv_path)
# Run all analyses
overall_patterns, all_codes = analyze_overall_patterns(flat_df)
round_patterns = analyze_by_round(flat_df)
agreement_counts = analyze_agreements(flat_df)
transitions = analyze_transitions(flat_df)
calculate_stability_metrics(transitions)
similarity_scores = analyze_similarity_scores(flat_df)



results = {
    'overall_patterns': overall_patterns,
    'round_patterns': round_patterns,
    'agreement_counts': agreement_counts,
    'transitions': transitions,
    'similarity_scores': similarity_scores
}


CODING PATTERN ANALYSIS BY AGENT

Ava's Coding Pattern:
----------------------------------------
  WCT     : 115 times ( 51.3%)
  NONE    :  51 times ( 22.8%)
  GT      :  42 times ( 18.8%)
  Other   :  16 times (  7.1%)

Ben's Coding Pattern:
----------------------------------------
  WCT     : 152 times ( 67.9%)
  NONE    :  41 times ( 18.3%)
  GT      :  16 times (  7.1%)
  Other   :  15 times (  6.7%)

Cam's Coding Pattern:
----------------------------------------
  WCT     : 152 times ( 67.9%)
  NONE    :  36 times ( 16.1%)
  GT      :  22 times (  9.8%)
  Other   :  14 times (  6.2%)

CODING PATTERN BY ROUND

Round 1:
----------------------------------------
  Ava: {'NONE': 20, 'Other': 5, 'WCT': 53, 'GT': 22}
  Ben: {'GT': 8, 'WCT': 66, 'Other': 9, 'NONE': 17}
  Cam: {'WCT': 71, 'NONE': 10, 'GT': 10, 'Other': 9}

Round 2:
----------------------------------------
  Ava: {'NONE': 15, 'Other': 7, 'WCT': 40, 'GT': 13}
  Ben: {'NONE': 13, 'Other': 4, 'WCT': 54, 'GT': 4}
  Cam: {'WCT'

## visulauzie

### utils

In [7]:
# Reset any partial matplotlib imports from previous failures
import sys

for _mod in list(sys.modules):
    if _mod.startswith("matplotlib"):
        sys.modules.pop(_mod, None)

import matplotlib
matplotlib.use("Agg", force=True)
fig_path = r"C:\Users\bahar\Repositories\llm-ta-aied26\results\figures"

In [8]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
import numpy as np

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 10)

def load_data(filepath):
    """Load and parse the CSV data."""
    df = pd.read_csv(filepath)
    df['codes_dict'] = df['codes'].apply(lambda x: eval(x))
    return df

def plot_overall_distribution(df, save_path='agent_code_distribution.png'):
    """Plot overall code distribution for each agent."""
    # Extract codes
    all_codes = {'Ava': [], 'Ben': [], 'Cam': []}
    for _, row in df.iterrows():
        codes = row['codes_dict']
        for agent, code in codes.items():
            all_codes[agent].append(code)
    
    # Create data for plotting
    code_types = ['WCT', 'NONE', 'GT', 'Other']
    agents = ['Ava', 'Ben', 'Cam']
    
    data = []
    for agent in agents:
        counts = Counter(all_codes[agent])
        total = len(all_codes[agent])
        for code in code_types:
            percentage = (counts[code] / total * 100) if code in counts else 0
            data.append({
                'Agent': agent,
                'Code': code,
                'Percentage': percentage,
                'Count': counts.get(code, 0)
            })
    
    plot_df = pd.DataFrame(data)
    
    # Create plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # Grouped bar chart
    x = np.arange(len(code_types))
    width = 0.25
    
    for i, agent in enumerate(agents):
        agent_data = plot_df[plot_df['Agent'] == agent]
        percentages = [agent_data[agent_data['Code'] == code]['Percentage'].values[0] 
                      for code in code_types]
        ax1.bar(x + i*width, percentages, width, label=agent)
    
    ax1.set_xlabel('Code Type', fontsize=12)
    ax1.set_ylabel('Percentage (%)', fontsize=12)
    ax1.set_title('Code Distribution by Agent', fontsize=14, fontweight='bold')
    ax1.set_xticks(x + width)
    ax1.set_xticklabels(code_types)
    ax1.legend()
    ax1.grid(axis='y', alpha=0.3)
    
    # Stacked bar chart
    for code in code_types:
        code_data = [plot_df[(plot_df['Agent'] == agent) & (plot_df['Code'] == code)]['Percentage'].values[0] 
                    for agent in agents]
        ax2.bar(agents, code_data, label=code)
    
    ax2.set_xlabel('Agent', fontsize=12)
    ax2.set_ylabel('Percentage (%)', fontsize=12)
    ax2.set_title('Code Distribution (Stacked)', fontsize=14, fontweight='bold')
    ax2.legend(title='Code Type')
    ax2.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{fig_path}/{save_path}', dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path}")
    plt.close()

def plot_agreement_patterns(df, save_path='agreement_patterns.png'):
    """Plot agreement patterns between agents."""
    agreement_counts = {
        'All 3 Agree': 0,
        'Ava & Ben': 0,
        'Ava & Cam': 0,
        'Ben & Cam': 0,
        'All Disagree': 0
    }
    
    for _, row in df.iterrows():
        codes = row['codes_dict']
        ava, ben, cam = codes['Ava'], codes['Ben'], codes['Cam']
    
        if ava == ben == cam:
            agreement_counts['All 3 Agree'] += 1
        elif ava == ben:
            agreement_counts['Ava & Ben'] += 1
        elif ava == cam:
            agreement_counts['Ava & Cam'] += 1
        elif ben == cam:
            agreement_counts['Ben & Cam'] += 1
        else:
            agreement_counts['All Disagree'] += 1
    
    total = len(df)
    
    # Create pie chart
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # Pie chart
    colors = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c', '#95a5a6']
    wedges, texts, autotexts = ax1.pie(
        agreement_counts.values(),
        labels=agreement_counts.keys(),
        autopct='%1.1f%%',
        colors=colors,
        startangle=90
    )
    ax1.set_title('Agreement Pattern Distribution', fontsize=14, fontweight='bold')
    
    # Make percentage text more readable
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    
    # Bar chart
    ax2.barh(list(agreement_counts.keys()), list(agreement_counts.values()), color=colors)
    ax2.set_xlabel('Count', fontsize=12)
    ax2.set_title('Agreement Pattern Counts', fontsize=14, fontweight='bold')
    ax2.grid(axis='x', alpha=0.3)
    
    # Add count labels
    for i, v in enumerate(agreement_counts.values()):
        ax2.text(v + 1, i, str(v), va='center')
    
    plt.tight_layout()
    plt.savefig(f'{fig_path}/{save_path}', dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path}")
    plt.close()

def plot_round_evolution(df, save_path='round_evolution.png'):
    """Plot how coding patterns evolve across rounds."""
    fig, axes = plt.subplots(3, 1, figsize=(14, 12))
    agents = ['Ava', 'Ben', 'Cam']
    code_types = ['WCT', 'NONE', 'GT', 'Other']
    colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']
    
    for idx, agent in enumerate(agents):
        ax = axes[idx]
    
        for code, color in zip(code_types, colors):
            round_percentages = []
            for round_num in sorted(df['round'].unique()):
                round_df = df[df['round'] == round_num]
                codes = [row['codes_dict'][agent] for _, row in round_df.iterrows()]
                count = codes.count(code)
                percentage = (count / len(codes) * 100) if codes else 0
                round_percentages.append(percentage)
    
            ax.plot(sorted(df['round'].unique()), round_percentages, 
                   marker='o', linewidth=2, label=code, color=color)
    
        ax.set_xlabel('Round', fontsize=11)
        ax.set_ylabel('Percentage (%)', fontsize=11)
        ax.set_title(f'{agent}\'s Coding Evolution Across Rounds', 
                    fontsize=13, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
        ax.set_xticks(sorted(df['round'].unique()))
    
    plt.tight_layout()
    save_path_full = os.path.join(fig_path, save_path)
    plt.savefig(save_path_full, dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path_full}")
    plt.close()

def plot_similarity_heatmap(df, save_path='similarity_heatmap.png'):
    """Plot similarity scores as heatmap across rounds (matplotlib fallback)."""
    rounds = sorted(df['round'].unique())
    if not rounds:
        print("No rounds found for similarity heatmap.")
        return
    
    similarity_data = {
        'Ava-Ben': [],
        'Ava-Cam': [],
        'Ben-Cam': []
    }
    for round_num in rounds:
        round_df = df[df['round'] == round_num]
        similarity_data['Ava-Ben'].append(round_df['sim_Ava_Ben'].mean())
        similarity_data['Ava-Cam'].append(round_df['sim_Ava_Cam'].mean())
        similarity_data['Ben-Cam'].append(round_df['sim_Ben_Cam'].mean())
    
    heatmap_data = pd.DataFrame(similarity_data, index=[f'Round {r}' for r in rounds])
    
    fig, ax = plt.subplots(figsize=(10, 6))
    im = ax.imshow(heatmap_data.T.values, aspect='auto', cmap='YlGnBu')
    ax.set_yticks(range(len(heatmap_data.columns)))
    ax.set_yticklabels(heatmap_data.columns)
    ax.set_xticks(range(len(heatmap_data.index)))
    ax.set_xticklabels(heatmap_data.index, rotation=45, ha='right')
    ax.set_title('Agent Similarity Scores Across Rounds', fontsize=14, fontweight='bold')
    ax.set_xlabel('Round', fontsize=12)
    ax.set_ylabel('Agent Pair', fontsize=12)
    fig.colorbar(im, ax=ax, label='Similarity Score')
    
    plt.tight_layout()
    save_path_full = os.path.join(fig_path, save_path)
    plt.savefig(save_path_full, dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path_full}")
    plt.close()

def plot_transition_sankey(df, save_path='transition_flows.png'):
    """Plot transition patterns for each agent (matplotlib fallback)."""
    agents = ['Ava', 'Ben', 'Cam']
    code_types = ['WCT', 'NONE', 'GT', 'Other']
    fig, axes = plt.subplots(3, 1, figsize=(14, 12))
    
    for idx, agent in enumerate(agents):
        ax = axes[idx]
        transitions = defaultdict(lambda: defaultdict(int))
    
        # Calculate transitions
        for row_idx in df['row_index'].unique():
            row_data = df[df['row_index'] == row_idx].sort_values('round')
            codes = [row['codes_dict'][agent] for _, row in row_data.iterrows()]
    
            for i in range(len(codes) - 1):
                transitions[codes[i]][codes[i+1]] += 1
    
        matrix = np.zeros((len(code_types), len(code_types)))
        for i, from_code in enumerate(code_types):
            for j, to_code in enumerate(code_types):
                matrix[i][j] = transitions[from_code].get(to_code, 0)
    
        im = ax.imshow(matrix, cmap='Blues')
        ax.set_xticks(range(len(code_types)))
        ax.set_yticks(range(len(code_types)))
        ax.set_xticklabels(code_types)
        ax.set_yticklabels(code_types)
        ax.set_title(f"{agent}'s Code Transitions", fontsize=13, fontweight='bold')
        ax.set_xlabel('To Code', fontsize=11)
        ax.set_ylabel('From Code', fontsize=11)
        for i in range(len(code_types)):
            for j in range(len(code_types)):
                ax.text(j, i, int(matrix[i, j]), ha='center', va='center', color='black')
    
    fig.colorbar(im, ax=axes, label='Transition Count')
    plt.tight_layout()
    save_path_full = os.path.join(fig_path, save_path)
    plt.savefig(save_path_full, dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path_full}")
    plt.close()



### run

In [9]:

plot_overall_distribution(flat_df)
plot_agreement_patterns(flat_df)
plot_round_evolution(flat_df)
# plot_similarity_heatmap(flat_df)
plot_transition_sankey(flat_df)

Saved: agent_code_distribution.png
Saved: agreement_patterns.png
Saved: C:\Users\bahar\Repositories\llm-ta-aied26\results\figures\round_evolution.png


  plt.tight_layout()


Saved: C:\Users\bahar\Repositories\llm-ta-aied26\results\figures\transition_flows.png
