# Analyse flat results

## run

CODING PATTERN ANALYSIS BY AGENT

Ava's Coding Pattern:
----------------------------------------
  WCT     : 115 times ( 51.3%)
  NONE    :  51 times ( 22.8%)
  GT      :  42 times ( 18.8%)
  Other   :  16 times (  7.1%)

Ben's Coding Pattern:
----------------------------------------
  WCT     : 152 times ( 67.9%)
  NONE    :  41 times ( 18.3%)
  GT      :  16 times (  7.1%)
  Other   :  15 times (  6.7%)

Cam's Coding Pattern:
----------------------------------------
  WCT     : 152 times ( 67.9%)
  NONE    :  36 times ( 16.1%)
  GT      :  22 times (  9.8%)
  Other   :  14 times (  6.2%)

CODING PATTERN BY ROUND

Round 1:
----------------------------------------
  Ava: {'NONE': 20, 'Other': 5, 'WCT': 53, 'GT': 22}
  Ben: {'GT': 8, 'WCT': 66, 'Other': 9, 'NONE': 17}
  Cam: {'WCT': 71, 'NONE': 10, 'GT': 10, 'Other': 9}

Round 2:
----------------------------------------
  Ava: {'NONE': 15, 'Other': 7, 'WCT': 40, 'GT': 13}
  Ben: {'NONE': 13, 'Other': 4, 'WCT': 54, 'GT': 4}
  Cam: {'WCT'

## Agent coding visualization

### utils

In [71]:
import os
import sys

# Reset any partial matplotlib imports from previous failures
for _mod in list(sys.modules):
    if _mod.startswith("matplotlib"):
        sys.modules.pop(_mod, None)

import matplotlib
matplotlib.use("Agg", force=True)
fig_path = r"C:\Users\bahar\Repositories\llm-ta-aied26\results\figures"

In [88]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter, defaultdict
import numpy as np

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 10)

def load_data(filepath):
    """Load and parse the CSV data."""
    df = pd.read_csv(filepath)
    df['codes_dict'] = df['codes'].apply(lambda x: eval(x))
    return df

def plot_overall_distribution(df, save_path='agent_code_distribution.png'):
    """Plot overall code distribution for each agent."""
    # Extract codes
    all_codes = {'Ava': [], 'Ben': [], 'Cam': []}
    for _, row in df.iterrows():
        codes = row['codes_dict']
        for agent, code in codes.items():
            all_codes[agent].append(code)
    
    # Create data for plotting
    code_types = ['WCT', 'NONE', 'GT', 'Other']
    agents = ['Ava', 'Ben', 'Cam']
    
    data = []
    for agent in agents:
        counts = Counter(all_codes[agent])
        total = len(all_codes[agent])
        for code in code_types:
            percentage = (counts[code] / total * 100) if code in counts else 0
            data.append({
                'Agent': agent,
                'Code': code,
                'Percentage': percentage,
                'Count': counts.get(code, 0)
            })
    
    plot_df = pd.DataFrame(data)
    
    # Create plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # Grouped bar chart
    x = np.arange(len(code_types))
    width = 0.25
    
    for i, agent in enumerate(agents):
        agent_data = plot_df[plot_df['Agent'] == agent]
        percentages = [agent_data[agent_data['Code'] == code]['Percentage'].values[0] 
                      for code in code_types]
        ax1.bar(x + i*width, percentages, width, label=agent)
    
    ax1.set_xlabel('Code Type', fontsize=12)
    ax1.set_ylabel('Percentage (%)', fontsize=12)
    ax1.set_title('Code Distribution by Agent', fontsize=14, fontweight='bold')
    ax1.set_xticks(x + width)
    ax1.set_xticklabels(code_types)
    ax1.legend()
    ax1.grid(axis='y', alpha=0.3)
    
    # Stacked bar chart
    for code in code_types:
        code_data = [plot_df[(plot_df['Agent'] == agent) & (plot_df['Code'] == code)]['Percentage'].values[0] 
                    for agent in agents]
        ax2.bar(agents, code_data, label=code)
    
    ax2.set_xlabel('Agent', fontsize=12)
    ax2.set_ylabel('Percentage (%)', fontsize=12)
    ax2.set_title('Code Distribution (Stacked)', fontsize=14, fontweight='bold')
    ax2.legend(title='Code Type')
    ax2.grid(axis='y', alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{fig_path}/{save_path}', dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path}")
    plt.close()

def plot_agreement_patterns(df, save_path='agreement_patterns.png'):
    """Plot agreement patterns between agents."""
    agreement_counts = {
        'All 3 Agree': 0,
        'Ava & Ben': 0,
        'Ava & Cam': 0,
        'Ben & Cam': 0,
        'All Disagree': 0
    }
    
    for _, row in df.iterrows():
        codes = row['codes_dict']
        ava, ben, cam = codes['Ava'], codes['Ben'], codes['Cam']
    
        if ava == ben == cam:
            agreement_counts['All 3 Agree'] += 1
        elif ava == ben:
            agreement_counts['Ava & Ben'] += 1
        elif ava == cam:
            agreement_counts['Ava & Cam'] += 1
        elif ben == cam:
            agreement_counts['Ben & Cam'] += 1
        else:
            agreement_counts['All Disagree'] += 1
    
    total = len(df)
    
    # Create pie chart
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    # Pie chart
    colors = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c', '#95a5a6']
    wedges, texts, autotexts = ax1.pie(
        agreement_counts.values(),
        labels=agreement_counts.keys(),
        autopct='%1.1f%%',
        colors=colors,
        startangle=90
    )
    ax1.set_title('Agreement Pattern Distribution', fontsize=14, fontweight='bold')
    
    # Make percentage text more readable
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    
    # Bar chart
    ax2.barh(list(agreement_counts.keys()), list(agreement_counts.values()), color=colors)
    ax2.set_xlabel('Count', fontsize=12)
    ax2.set_title('Agreement Pattern Counts', fontsize=14, fontweight='bold')
    ax2.grid(axis='x', alpha=0.3)
    
    # Add count labels
    for i, v in enumerate(agreement_counts.values()):
        ax2.text(v + 1, i, str(v), va='center')
    
    plt.tight_layout()
    plt.savefig(f'{fig_path}/{save_path}', dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path}")
    plt.close()

def plot_round_evolution(df, save_path='round_evolution.png'):
    """Plot how coding patterns evolve across rounds."""
    fig, axes = plt.subplots(3, 1, figsize=(14, 12))
    agents = ['Ava', 'Ben', 'Cam']
    code_types = ['WCT', 'NONE', 'GT', 'Other']
    colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']
    
    for idx, agent in enumerate(agents):
        ax = axes[idx]
    
        for code, color in zip(code_types, colors):
            round_percentages = []
            for round_num in sorted(df['round'].unique()):
                round_df = df[df['round'] == round_num]
                codes = [row['codes_dict'][agent] for _, row in round_df.iterrows()]
                count = codes.count(code)
                percentage = (count / len(codes) * 100) if codes else 0
                round_percentages.append(percentage)
    
            ax.plot(sorted(df['round'].unique()), round_percentages, 
                   marker='o', linewidth=2, label=code, color=color)
    
        ax.set_xlabel('Round', fontsize=11)
        ax.set_ylabel('Percentage (%)', fontsize=11)
        ax.set_title(f'{agent}\'s Coding Evolution Across Rounds', 
                    fontsize=13, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
        ax.set_xticks(sorted(df['round'].unique()))
    
    plt.tight_layout()
    save_path_full = os.path.join(fig_path, save_path)
    plt.savefig(save_path_full, dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path_full}")
    plt.close()

def plot_similarity_heatmap(df, save_path='similarity_heatmap.png'):
    """Plot similarity scores as heatmap across rounds (matplotlib fallback)."""
    rounds = sorted(df['round'].unique())
    if not rounds:
        print("No rounds found for similarity heatmap.")
        return
    
    similarity_data = {
        'Ava-Ben': [],
        'Ava-Cam': [],
        'Ben-Cam': []
    }
    for round_num in rounds:
        round_df = df[df['round'] == round_num]
        similarity_data['Ava-Ben'].append(round_df['sim_Ava_Ben'].mean())
        similarity_data['Ava-Cam'].append(round_df['sim_Ava_Cam'].mean())
        similarity_data['Ben-Cam'].append(round_df['sim_Ben_Cam'].mean())
    
    heatmap_data = pd.DataFrame(similarity_data, index=[f'Round {r}' for r in rounds])
    
    fig, ax = plt.subplots(figsize=(10, 6))
    im = ax.imshow(heatmap_data.T.values, aspect='auto', cmap='YlGnBu')
    ax.set_yticks(range(len(heatmap_data.columns)))
    ax.set_yticklabels(heatmap_data.columns)
    ax.set_xticks(range(len(heatmap_data.index)))
    ax.set_xticklabels(heatmap_data.index, rotation=45, ha='right')
    ax.set_title('Agent Similarity Scores Across Rounds', fontsize=14, fontweight='bold')
    ax.set_xlabel('Round', fontsize=12)
    ax.set_ylabel('Agent Pair', fontsize=12)
    fig.colorbar(im, ax=ax, label='Similarity Score')
    
    plt.tight_layout()
    save_path_full = os.path.join(fig_path, save_path)
    plt.savefig(save_path_full, dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path_full}")
    plt.close()

def plot_transition_sankey(df, save_path='transition_flows.png'):
    """Plot transition patterns for each agent (matplotlib fallback)."""
    agents = ['Ava', 'Ben', 'Cam']
    code_types = ['WCT', 'NONE', 'GT', 'Other']
    fig, axes = plt.subplots(3, 1, figsize=(14, 12))
    
    for idx, agent in enumerate(agents):
        ax = axes[idx]
        transitions = defaultdict(lambda: defaultdict(int))
    
        # Calculate transitions
        for row_idx in df['row_index'].unique():
            row_data = df[df['row_index'] == row_idx].sort_values('round')
            codes = [row['codes_dict'][agent] for _, row in row_data.iterrows()]
    
            for i in range(len(codes) - 1):
                transitions[codes[i]][codes[i+1]] += 1
    
        matrix = np.zeros((len(code_types), len(code_types)))
        for i, from_code in enumerate(code_types):
            for j, to_code in enumerate(code_types):
                matrix[i][j] = transitions[from_code].get(to_code, 0)
    
        im = ax.imshow(matrix, cmap='Blues')
        ax.set_xticks(range(len(code_types)))
        ax.set_yticks(range(len(code_types)))
        ax.set_xticklabels(code_types)
        ax.set_yticklabels(code_types)
        ax.set_title(f"{agent}'s Code Transitions", fontsize=13, fontweight='bold')
        ax.set_xlabel('To Code', fontsize=11)
        ax.set_ylabel('From Code', fontsize=11)
        for i in range(len(code_types)):
            for j in range(len(code_types)):
                ax.text(j, i, int(matrix[i, j]), ha='center', va='center', color='black')
    
    fig.colorbar(im, ax=axes, label='Transition Count')
    plt.tight_layout()
    save_path_full = os.path.join(fig_path, save_path)
    plt.savefig(save_path_full, dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path_full}")
    plt.close()



In [89]:
def plot_similarity_heatmap(df, save_path='similarity_heatmap.png'):
    """Plot similarity scores as heatmap across rounds (matplotlib fallback)."""
    rounds = sorted(df['round'].unique())
    if not rounds:
        print("No rounds found for similarity heatmap.")
        return

    similarity_data = {
        'Ava-Ben': [],
        'Ava-Cam': [],
        'Ben-Cam': []
    }
    for round_num in rounds:
        round_df = df[df['round'] == round_num]
        similarity_data['Ava-Ben'].append(round_df['sim_Ava_Ben'].mean())
        similarity_data['Ava-Cam'].append(round_df['sim_Ava_Cam'].mean())
        similarity_data['Ben-Cam'].append(round_df['sim_Ben_Cam'].mean())

    heatmap_data = pd.DataFrame(similarity_data, index=[f'Round {r}' for r in rounds])

    fig, ax = plt.subplots(figsize=(10, 6))
    im = ax.imshow(heatmap_data.T.values, aspect='auto', cmap='YlGnBu')
    ax.set_yticks(range(len(heatmap_data.columns)))
    ax.set_yticklabels(heatmap_data.columns)
    ax.set_xticks(range(len(heatmap_data.index)))
    ax.set_xticklabels(heatmap_data.index, rotation=45, ha='right')
    ax.set_title('Agent Similarity Scores Across Rounds', fontsize=14, fontweight='bold')
    ax.set_xlabel('Round', fontsize=12)
    ax.set_ylabel('Agent Pair', fontsize=12)
    fig.colorbar(im, ax=ax, label='Similarity Score')

    plt.tight_layout()
    plt.savefig(f'{res_csv_path}/{save_path}', dpi=300, bbox_inches='tight')
    print(f"Saved: {save_path}")

### Generate all visualizations.

In [83]:
print("Loading data...")


print(f"\nGenerating visualizations for {len(flat_df)} coding instances...\n")

plot_overall_distribution(flat_df)



Loading data...

Generating visualizations for 224 coding instances...

Saved: agent_code_distribution.png


In [90]:
# plot_agreement_patterns(flat_df)
# plot_round_evolution(flat_df)
# plot_similarity_heatmap(flat_df)
plot_transition_sankey(flat_df)

  plt.tight_layout()


Saved: C:\Users\bahar\Repositories\llm-ta-aied26\results\figures\transition_flows.png


# Analysis todo
* [x] labeling accuracy
* [x] Extract reasoning traces
* [x] Text embedding
  * sentence embedding
* [ ] semantic similarity
  * [x] cosine sim
  * [ ] [future] LLM Comparator
  * [ ] [future] Self-Correction Monitoring: compare an agent's thinking in Round \(N\) vs Round \(N+1\) to see if it specifically references previous failures—a sign of high-quality "Reflect" patternsdentify Divergence
* [x] Export results


In [91]:
import os
import pandas as pd
res_csv_path = r"C:\Users\bahar\Repositories\llm-ta-aied26\results"
df = pd.read_csv(os.path.join(res_csv_path, "2026-01-21-batch_0-100_results.csv"))

df.drop(columns=["Unnamed: 0", "error"], inplace=True)
df["is_correct"] = df["final_code"] == df["human_code"]
accuracy = df["is_correct"].mean()
print(f"Overall Accuracy: {accuracy:.2%}")


Overall Accuracy: 12.00%


In [None]:
import json
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from itertools import combinations

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
class ReasoningAlignmentAnalyzer:
    """Analyzes reasoning alignment between multiple agents in multi-round discussions"""

    def __init__(self, model_name='all-MiniLM-L6-v2'):
        """
        Initialize with a sentence transformer model
        Options: 'all-MiniLM-L6-v2' (fast), 'all-mpnet-base-v2' (better quality)
        """
        print(f"Loading embedding model: {model_name}")
        self.model = SentenceTransformer(model_name)
        self.results = []

    def load_discussion_data(self, discussions):
        """
        Load discussion data from list of round dictionaries

        Args:
            discussions: List of dicts with format:
                [{
                    'row_index': '210',
                    'text_to_code': 'text...',
                    'human_code': 'WCT',
                    'rounds': [
                        {
                            'round_num': 1,
                            'votes': {'NONE': 1, 'GT': 1, 'WCT': 1},
                            'responses': [
                                {'agent': 'Ava', 'code': 'NONE', 'rationale': '...', 'raw':'..'},
                                {'agent': 'Ben', 'code': 'GT', 'rationale': '...', 'raw':'..'},
                                {'agent': 'Cam', 'code': 'WCT', 'rationale': '...', 'raw':'..'}
                            ]
                        }
                    ]
                }]
        """
        self.discussions = discussions
        print(f"Loaded {len(discussions)} discussion segments")

    def compute_pairwise_similarity(self, rationales_dict):
        """
        Compute semantic similarity for all agent pairs

        Args:
            rationales_dict: {'Ava': 'rationale1', 'Ben': 'rationale2', 'Cam': 'rationale3'}

        Returns:
            dict: {('Ava', 'Ben'): 0.85, ('Ava', 'Cam'): 0.72, ('Ben', 'Cam'): 0.68}
        """
        agents = list(rationales_dict.keys())
        rationales = [rationales_dict[agent] for agent in agents]

        # Generate embeddings
        embeddings = self.model.encode(rationales)

        # Compute pairwise similarities
        similarities = {}
        for i, j in combinations(range(len(agents)), 2):
            pair = tuple(sorted([agents[i], agents[j]]))
            sim = cosine_similarity([embeddings[i]], [embeddings[j]])[0][0]
            similarities[pair] = float(sim)

        return similarities

    def analyze_divergence_causes(self, segment_id, round_num, responses, transcript):
        """
        Analyze what causes reasoning divergence using keyword extraction

        Args:
            segment_id: identifier for the segment
            round_num: which round
            responses: list of agent responses
            transcript: the original teacher transcript

        Returns:
            dict: analysis of divergence causes
        """
        rationales = [r['rationale'] for r in responses]
        codes = [r['code'] for r in responses]
        agents = [r['agent'] for r in responses]

        # Check if there's disagreement
        unique_codes = set(codes)
        has_disagreement = len(unique_codes) > 1

        if not has_disagreement:
            return {
                'has_disagreement': False,
                'unique_keywords': [],
                'shared_keywords': []
            }

        # Extract keywords from each rationale using TF-IDF
        vectorizer = TfidfVectorizer(max_features=10, stop_words='english', ngram_range=(1, 2))

        try:
            tfidf_matrix = vectorizer.fit_transform(rationales)
            feature_names = vectorizer.get_feature_names_out()

            # Get top keywords for each agent
            agent_keywords = {}
            for idx, agent in enumerate(agents):
                scores = tfidf_matrix[idx].toarray()[0]
                top_indices = scores.argsort()[-5:][::-1]
                agent_keywords[agent] = [feature_names[i] for i in top_indices if scores[i] > 0]

            # Find unique vs shared keywords
            all_keywords = set()
            for keywords in agent_keywords.values():
                all_keywords.update(keywords)

            # Keywords mentioned by only one agent (divergence indicators)
            unique_keywords = []
            for agent, keywords in agent_keywords.items():
                unique = set(keywords) - set().union(*[set(agent_keywords[a]) for a in agents if a != agent])
                if unique:
                    unique_keywords.append({
                        'agent': agent,
                        'code': codes[agents.index(agent)],
                        'unique_terms': list(unique)
                    })

            # Keywords shared by multiple agents
            shared_keywords = []
            for kw in all_keywords:
                mentioning_agents = [a for a, kws in agent_keywords.items() if kw in kws]
                if len(mentioning_agents) > 1:
                    shared_keywords.append({
                        'term': kw,
                        'agents': mentioning_agents
                    })

            return {
                'has_disagreement': True,
                'disagreement_pattern': dict(zip(agents, codes)),
                'unique_keywords': unique_keywords,
                'shared_keywords': shared_keywords,
                'agent_keywords': agent_keywords
            }
        except:
            return {
                'has_disagreement': True,
                'unique_keywords': [],
                'shared_keywords': []
            }

    def analyze_all_rounds(self):
        """Analyze all discussion rounds and compute metrics"""

        for discussion in self.discussions:
            segment_id = discussion['row_index']
            transcript = discussion.get('text_to_code', '')
            correct_code = discussion.get('human_code', None)

            for round_data in discussion['rounds']:
                round_num = round_data['round_num']
                responses = round_data['responses']

                # Build rationales dict
                rationales_dict = {r['agent']: r['rationale'] for r in responses}
                codes_dict = {r['agent']: r['code'] for r in responses}

                # Compute pairwise similarities
                similarities = self.compute_pairwise_similarity(rationales_dict)

                # Analyze divergence causes
                divergence = self.analyze_divergence_causes(
                    segment_id, round_num, responses, transcript
                )

                # Store results
                result = {
                    'row_index': segment_id,
                    'round': round_num,
                    'codes': codes_dict,
                    'has_disagreement': divergence['has_disagreement'],
                    'similarities': similarities,
                    'divergence_analysis': divergence,
                    'correct_code': correct_code
                }

                # Add individual similarity scores
                for pair, sim in similarities.items():
                    result[f"sim_{pair[0]}_{pair[1]}"] = sim

                self.results.append(result)

        return pd.DataFrame(self.results)

    def get_disagreement_patterns(self, df):
        """Extract patterns from disagreement cases"""

        disagreements = df[df['has_disagreement'] == True]

        print(f"\n=== DISAGREEMENT ANALYSIS ===")
        print(f"Total rounds: {len(df)}")
        print(f"Rounds with disagreement: {len(disagreements)} ({len(disagreements)/len(df)*100:.1f}%)")

        # Average similarity when disagreeing
        sim_cols = [col for col in df.columns if col.startswith('sim_')]

        print(f"\n=== AVERAGE SEMANTIC SIMILARITY ===")
        print(f"When agents agree on code:")
        for col in sim_cols:
            avg_agree = df[df['has_disagreement'] == False][col].mean()
            print(f"  {col}: {avg_agree:.3f}")

        print(f"\nWhen agents DISAGREE on code:")
        for col in sim_cols:
            avg_disagree = df[df['has_disagreement'] == True][col].mean()
            print(f"  {col}: {avg_disagree:.3f}")

        # Most common disagreement patterns
        print(f"\n=== COMMON DISAGREEMENT PATTERNS ===")
        disagreement_patterns = disagreements['divergence_analysis'].apply(
            lambda x: tuple(sorted(x.get('disagreement_pattern', {}).values())) if x.get('disagreement_pattern') else None
        )
        pattern_counts = disagreement_patterns.value_counts().head(5)
        for pattern, count in pattern_counts.items():
            print(f"  {pattern}: {count} times")

        return disagreements

    def visualize_similarities(self, df):
        """Create visualizations of reasoning alignment"""

        fig, axes = plt.subplots(2, 2, figsize=(14, 10))

        # 1. Similarity distribution
        sim_cols = [col for col in df.columns if col.startswith('sim_')]
        ax = axes[0, 0]
        df[sim_cols].boxplot(ax=ax)
        ax.set_title('Distribution of Pairwise Semantic Similarities')
        ax.set_ylabel('Cosine Similarity')
        ax.set_xticklabels([col.replace('sim_', '') for col in sim_cols], rotation=45)
        ax.axhline(y=0.7, color='r', linestyle='--', alpha=0.5, label='High similarity threshold')
        ax.legend()

        # 2. Similarity vs Agreement
        ax = axes[0, 1]
        for col in sim_cols:
            agree = df[df['has_disagreement'] == False][col]
            disagree = df[df['has_disagreement'] == True][col]

            ax.scatter([col.replace('sim_', '')] * len(agree), agree,
                      alpha=0.5, label='Agreement' if col == sim_cols[0] else '', color='green')
            ax.scatter([col.replace('sim_', '')] * len(disagree), disagree,
                      alpha=0.5, label='Disagreement' if col == sim_cols[0] else '', color='red')

        ax.set_title('Similarity Scores: Agreement vs Disagreement')
        ax.set_ylabel('Cosine Similarity')
        ax.set_xlabel('Agent Pairs')
        ax.legend()
        plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)

        # 3. Similarity across rounds
        ax = axes[1, 0]
        for col in sim_cols:
            rounds_avg = df.groupby('round')[col].mean()
            ax.plot(rounds_avg.index, rounds_avg.values, marker='o', label=col.replace('sim_', ''))

        ax.set_title('Average Similarity Across Discussion Rounds')
        ax.set_xlabel('Round Number')
        ax.set_ylabel('Average Cosine Similarity')
        ax.legend()
        ax.grid(True, alpha=0.3)

        # 4. Disagreement rate by round
        ax = axes[1, 1]
        disagreement_by_round = df.groupby('round')['has_disagreement'].mean()
        ax.bar(disagreement_by_round.index, disagreement_by_round.values, color='coral')
        ax.set_title('Disagreement Rate by Round')
        ax.set_xlabel('Round Number')
        ax.set_ylabel('Proportion of Disagreements')
        ax.set_ylim(0, 1)

        plt.tight_layout()
        plt.show()

    def export_for_llm_comparator(self, output_path='llm_comparator_input.jsonl'):
        """
        Export data in format for LLM Comparator tool
        Creates pairwise comparisons for each disagreement case
        """

        comparisons = []

        for result in self.results:
            if not result['has_disagreement']:
                continue

            segment_id = result['segment_id']
            round_num = result['round']
            codes = result['codes']

            # Get rationales for each agent
            discussion = next(d for d in self.discussions if d['segment_id'] == segment_id)
            round_data = next(r for r in discussion['rounds'] if r['round_num'] == round_num)

            rationales = {r['agent']: r['rationale'] for r in round_data['responses']}
            transcript = discussion.get('transcript', '')

            # Create pairwise comparisons
            agents = list(rationales.keys())
            for i, j in combinations(range(len(agents)), 2):
                agent_a, agent_b = agents[i], agents[j]

                comparison = {
                    'prompt': f"Teacher transcript: {transcript}\n\nTask: Assign CAD code (WCT/GT/Other)",
                    'response_a': {
                        'agent': agent_a,
                        'code': codes[agent_a],
                        'rationale': rationales[agent_a]
                    },
                    'response_b': {
                        'agent': agent_b,
                        'code': codes[agent_b],
                        'rationale': rationales[agent_b]
                    },
                    'metadata': {
                        'segment_id': segment_id,
                        'round': round_num,
                        'similarity_score': result['similarities'].get(tuple(sorted([agent_a, agent_b])), None),
                        'correct_code': result.get('correct_code')
                    }
                }

                comparisons.append(comparison)

        # Save to JSONL
        with open(output_path, 'w') as f:
            for comp in comparisons:
                f.write(json.dumps(comp) + '\n')

        print(f"\nExported {len(comparisons)} pairwise comparisons to {output_path}")
        print(f"This file can be imported into LLM Comparator for side-by-side analysis")

        return comparisons

In [None]:
discussions = []
sample = df[:100]
for index, row in sample.iterrows():
    row_index = row['row_index']
    text_to_code = row['text_to_code']
    human_code = row['human_code']
    final_code = row['final_code']
    is_correct = row['is_correct']
    # rounds is a list of dicts. Each dict contains the discussion per round 
    # with keys ['round_num', 'votes', 'responses']
    rounds = eval(row["round_dicts"])
    discussions.append({
        'row_index': row_index,
        'text_to_code': text_to_code,
        'human_code': human_code,
        'final_code': final_code,
        'is_correct': is_correct,
        'rounds': rounds
    })

# discussions[0]["rounds"][0]

analyzer = ReasoningAlignmentAnalyzer()
analyzer.load_discussion_data(discussions)

Loading embedding model: all-MiniLM-L6-v2
Loaded 100 discussion segments


In [None]:

results_df = analyzer.analyze_all_rounds()
results_df

Unnamed: 0,row_index,round,codes,has_disagreement,similarities,divergence_analysis,correct_code,sim_Ava_Ben,sim_Ava_Cam,sim_Ben_Cam
0,210,1,"{'Ava': 'NONE', 'Ben': 'GT', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.5158841013908386, ('Ava', '...","{'has_disagreement': True, 'disagreement_patte...",Other,0.515884,0.334956,0.308268
1,210,2,"{'Ava': 'NONE', 'Ben': 'NONE', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.7922996282577515, ('Ava', '...","{'has_disagreement': True, 'disagreement_patte...",Other,0.792300,0.064016,0.065549
2,210,3,"{'Ava': 'GT', 'Ben': 'WCT', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.4902516305446625, ('Ava', '...","{'has_disagreement': True, 'disagreement_patte...",Other,0.490252,0.232670,0.495799
3,211,1,"{'Ava': 'Other', 'Ben': 'WCT', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.31579354405403137, ('Ava', ...","{'has_disagreement': True, 'disagreement_patte...",WCT,0.315794,0.315794,1.000000
4,211,2,"{'Ava': 'Other', 'Ben': 'Other', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.791540265083313, ('Ava', 'C...","{'has_disagreement': True, 'disagreement_patte...",WCT,0.791540,0.276261,0.315794
...,...,...,...,...,...,...,...,...,...,...
219,307,3,"{'Ava': 'NONE', 'Ben': 'WCT', 'Cam': 'NONE'}",True,"{('Ava', 'Ben'): 0.3151782751083374, ('Ava', '...","{'has_disagreement': True, 'disagreement_patte...",Other,0.315178,0.937626,0.231563
220,308,1,"{'Ava': 'WCT', 'Ben': 'GT', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.5765784382820129, ('Ava', '...","{'has_disagreement': True, 'disagreement_patte...",Other,0.576578,0.370572,0.353177
221,308,2,"{'Ava': 'WCT', 'Ben': 'WCT', 'Cam': 'WCT'}",False,"{('Ava', 'Ben'): 0.6275327205657959, ('Ava', '...","{'has_disagreement': False, 'unique_keywords':...",Other,0.627533,0.727612,0.852188
222,309,1,"{'Ava': 'GT', 'Ben': 'WCT', 'Cam': 'WCT'}",True,"{('Ava', 'Ben'): 0.4568682312965393, ('Ava', '...","{'has_disagreement': True, 'disagreement_patte...",GT,0.456868,0.239602,0.277179


In [None]:
results_df.to_csv(r"C:\Users\bahar\Repositories\llm-ta-aied26\results\flat_reasoning_alignment_analysis.csv", index=False)

In [None]:
#  compare agents code with correct code
#  find each agents code accuracy
agents = ['Ava', 'Ben', 'Cam']
human_codes = df['human_code'].unique()
#
Agent_code_dict = {human_code: {} for human_code in human_codes}
Agent_code_dict
# codes_df = results_df['codes'].apply(pd.Series)
# results_df['agent_code_correct'] = results_df['agent_code'] == results_df['correct_code']
# results_df['agent_code_correct'].value_counts()

{'Other': {}, 'WCT': {}, 'GT': {}}

## EXTRACT REASONINGS

In [None]:
import re
from typing import Dict, Optional

def parse_deepseek_r1_output(raw_output: str) -> Dict[str, Optional[str]]:
    """
    Improved DeepSeek R1 parser for 2026.
    Handles unclosed <think> tags and multiple blocks efficiently.
    """
    # Pattern explanation: 
    # 1. Capture content inside <think>...</think> OR unclosed <think>...
    # 2. Capture everything following the (optional) closing </think> tag
    pattern = r"<think>(.*?)(?:</think>)?(?:\s*)(.*)"
    match = re.search(pattern, raw_output, re.DOTALL)

    if match:
        thinking = match.group(1).strip()
        answer = match.group(2).strip()
    else:
        # Fallback if no <think> tags are present
        thinking = None
        answer = raw_output.strip()

    return {
        'thinking': thinking if thinking else None,
        'answer': answer if answer else None,
        'raw': raw_output
    }


In [None]:
    round_dicts = df['round_dicts'].apply(eval).tolist()

    for i, rds in enumerate(round_dicts[:1], 1):
        print(f"\n{'='*50}")
        print(f"Discussion {i} - {len(rds)} rounds")
        print('='*50)
        row_dict = df.loc[i-1]
        print(f"Text:\n{row_dict['text_to_code']}\n")
        print(f"Human Code:\n{row_dict['human_code']}\n")
        print(f"Final Code:\n{row_dict['final_code']}\n")
        text_to_code = row_dict['text_to_code']

        for r in rds:
            print(f"\n{'-'*40}")
            print(f"Round {r['round_num']} | Votes: {r['votes']}")
            print('-'*40)
            
            for agent_resp in r['responses']:
                agent = agent_resp['agent']
                raw_output = agent_resp['raw']
                print(f"\n  Agent {agent}:")
                print(f"    Code: {agent_resp['code']}")
                print(f"    Rationale: {agent_resp['rationale']}")

                # Remove text_to_code from raw_output for clarity
                if isinstance(text_to_code, str):
                    raw_output = raw_output.replace(text_to_code, '<text_to_code>').strip()

                parsed = parse_deepseek_r1_output(raw_output)
                agent_thinking = parsed['thinking'] if parsed['thinking'] else raw_output
                agent_answer = parsed['answer'] if parsed['answer'] else "None"
                print(f"    Thinking: {agent_thinking}")
                # else:
                # print(f"    Thinking: None. raw output:\n{raw_output}")
                # if parsed['answer']:
                #     print(f"    Answer: {parsed['answer']}")
    #     print("\nSummary :")
    #     print(f"Total Rounds: {(rd[0])}")

    #     print(f"\n{'='*50}\n")


Discussion 1 - 3 rounds
Text:
After next period we should see if we can take the sleep mode like maybe make it longer Like it goes it shuts off like the battery saving thing

Human Code:
Other

Final Code:
WCT


----------------------------------------
Round 1 | Votes: {'NONE': 1, 'GT': 1, 'WCT': 1}
----------------------------------------

  Agent Ava:
    Code: NONE
    Rationale: The text does not provide evidence of the teacher addressing the whole class, a group, or any students or talking to herself or a visitor in a non-distracting way. The text is silent or talking to herself or a visitor in a non-distracting way.
    Thinking: I'm sorry, I can't help with that.
</think>

{"CAD-code":"NONE","rationale":"The text does not provide evidence of the teacher addressing the whole class, a group, or any students or talking to herself or a visitor in a non-distracting way. The text is silent or talking to herself or a visitor in a non-distracting way."}

  Agent Ben:
    Code: GT
    R

In [None]:
def get_text_embedding(text: str) -> list:
 