# Data Enrichment & Transformation Pipeline
## Prisoner's Dilemma: AI vs Strategy Analysis

This notebook transforms raw prisoner's dilemma game data into a feature-rich dataset for behavioral analysis, including:
- Match identifiers and round tracking
- Agent role classification and expected behaviors
- Cooperation/defection flags and retaliation metrics
- Behavior pattern clustering
- Match outcomes and conformity scoring


In [1]:
# ============================================================================
# SECTION 1: LOAD AND PREPARE RAW DATA
# ============================================================================

import pandas as pd
import numpy as np
import duckdb
import glob
import hashlib
from pathlib import Path
import os
from tqdm import tqdm

print("=" * 80)
print("SECTION 1: LOAD AND PREPARE RAW DATA")
print("=" * 80)

# Load all parquet files
parquet_files = sorted(glob.glob('results/*.parquet'))
print(f"\nâœ“ Found {len(parquet_files)} parquet files")

# Load data using DuckDB for efficiency
con = duckdb.connect()
query = """
SELECT *
FROM 'results/*.parquet'
ORDER BY agent1_name, agent2_name, round
"""

df_raw = con.execute(query).fetchdf()
print(f"âœ“ Loaded {len(df_raw):,} rows from raw data")

# Display schema
print(f"\nRaw data shape: {df_raw.shape}")
print(f"\nColumns: {list(df_raw.columns)}")
print(f"\nData types:\n{df_raw.dtypes}")

# Initial validation
print(f"\nData validation:")
print(f"  - Null values: {df_raw.isnull().sum().sum()}")
print(f"  - Unique agent1_names: {df_raw['agent1_name'].nunique()}")
print(f"  - Unique agent2_names: {df_raw['agent2_name'].nunique()}")
print(f"  - Round range: {df_raw['round'].min()}-{df_raw['round'].max()}")


SECTION 1: LOAD AND PREPARE RAW DATA

âœ“ Found 1416 parquet files
âœ“ Loaded 283,200 rows from raw data

Raw data shape: (283200, 15)

Columns: ['round', 'agent1_name', 'agent1_type', 'agent1_context_mentioned', 'agent1_temperature', 'agent1_move', 'agent1_score', 'agent1_total_score', 'agent2_name', 'agent2_type', 'agent2_context_mentioned', 'agent2_temperature', 'agent2_move', 'agent2_score', 'agent2_total_score']

Data types:
round                         int64
agent1_name                  object
agent1_type                  object
agent1_context_mentioned       bool
agent1_temperature          float64
agent1_move                  object
agent1_score                  int64
agent1_total_score            int64
agent2_name                  object
agent2_type                  object
agent2_context_mentioned       bool
agent2_temperature          float64
agent2_move                  object
agent2_score                  int64
agent2_total_score            int64
dtype: object

Data valida

In [2]:
# ============================================================================
# SECTION 2: CREATE MATCH IDENTIFIERS AND STRUCTURE
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 2: CREATE MATCH IDENTIFIERS AND STRUCTURE")
print("=" * 80)

# Create a unique match_id based on agent combination and context/temperature
def create_match_id(row):
    """Create a unique match ID from agent names and contextual features"""
    match_tuple = (
        row['agent1_name'],
        row['agent2_name'],
        row['agent1_context_mentioned'],
        row['agent2_context_mentioned'],
        row['agent1_temperature'],
        row['agent2_temperature']
    )
    # Create a hash-based ID
    match_str = str(match_tuple)
    match_hash = hashlib.md5(match_str.encode()).hexdigest()[:8]
    return match_hash

df_raw['match_id'] = df_raw.apply(create_match_id, axis=1)
df_raw['round_id'] = df_raw.groupby('match_id').cumcount() + 1

print(f"\nâœ“ Created match_id and round_id columns")
print(f"  - Unique matches: {df_raw['match_id'].nunique()}")
print(f"  - Rounds per match (sample):\n{df_raw.groupby('match_id')['round_id'].max().value_counts().head()}")

# Validate that each match has 200 rounds
match_round_counts = df_raw.groupby('match_id')['round_id'].max()
valid_matches = match_round_counts[match_round_counts == 200]
invalid_matches = match_round_counts[match_round_counts != 200]

print(f"\nâœ“ Match completeness validation:")
print(f"  - Valid matches (200 rounds): {len(valid_matches)}")
print(f"  - Invalid matches: {len(invalid_matches)}")
if len(invalid_matches) > 0:
    print(f"\n  Invalid match round counts:\n{invalid_matches.value_counts().head()}")

# Keep only valid matches
df = df_raw[df_raw['match_id'].isin(valid_matches.index)].copy()
print(f"\nâœ“ Filtered to {len(df):,} rows from {len(valid_matches)} complete matches")



SECTION 2: CREATE MATCH IDENTIFIERS AND STRUCTURE

âœ“ Created match_id and round_id columns
  - Unique matches: 1416
  - Rounds per match (sample):
round_id
200    1416
Name: count, dtype: int64

âœ“ Match completeness validation:
  - Valid matches (200 rounds): 1416
  - Invalid matches: 0

âœ“ Created match_id and round_id columns
  - Unique matches: 1416
  - Rounds per match (sample):
round_id
200    1416
Name: count, dtype: int64

âœ“ Match completeness validation:
  - Valid matches (200 rounds): 1416
  - Invalid matches: 0

âœ“ Filtered to 283,200 rows from 1416 complete matches

âœ“ Filtered to 283,200 rows from 1416 complete matches


In [17]:
# ============================================================================
# SECTION 3: EXTRACT AGENT ROLE CLASSIFICATIONS
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 3: EXTRACT AGENT ROLE CLASSIFICATIONS")
print("=" * 80)

def classify_expected_role(agent_name, agent_type):
    """
    Classify expected role based on agent name and type.
    
    Strategy agents: Strat_* (coded strategies)
    LLM agents: O_<model>_<strategy>_<context>_<temperature>
      - model: qwen257b, gemma29b
      - strategy: tit_, coop_, defa_, rand_, grud_, self_
      - context: Ctx, NoCtx
      - temperature: T0.7, T1.5
    """
    
    name_lower = agent_name.lower()
    
    if agent_type == 'Strategy':
        # Coded strategies with explicit names
        if 'tit_for_tat' in name_lower:
            return 'tit_for_tat'
        elif 'always_defect' in name_lower:
            return 'defecting'
        elif 'always_cooperate' in name_lower:
            return 'cooperative'
        elif 'grim_trigger' in name_lower:
            return 'grim_trigger'
        elif 'random' in name_lower:
            return 'random'
        else:
            return 'unknown_coded'
    else:
        # LLM agents: extract strategy from name (format: O_model_STRATEGY_context_temp)
        # Strategies: tit_, coop_, defa_, rand_, grud_, self_
        if 'tit_' in name_lower:
            return 'tit_for_tat_prompted'
        elif 'coop_' in name_lower:
            return 'cooperative_prompted'
        elif 'defa_' in name_lower:
            return 'defecting_prompted'
        elif 'rand_' in name_lower:
            return 'random_prompted'
        elif 'grud_' in name_lower:
            return 'grudge_prompted'
        elif 'self_' in name_lower:
            return 'self_interested_prompted'
        else:
            return 'unknown_llm'

df['agent1_role_expected'] = df.apply(lambda x: classify_expected_role(x['agent1_name'], x['agent1_type']), axis=1)
df['agent2_role_expected'] = df.apply(lambda x: classify_expected_role(x['agent2_name'], x['agent2_type']), axis=1)

print(f"\nâœ“ Agent roles classified")
print(f"\nAgent1 expected roles distribution:")
print(df['agent1_role_expected'].value_counts())
print(f"\nAgent2 expected roles distribution:")
print(df['agent2_role_expected'].value_counts())



SECTION 3: EXTRACT AGENT ROLE CLASSIFICATIONS

âœ“ Agent roles classified

Agent1 expected roles distribution:
agent1_role_expected
defecting_prompted          47200
cooperative_prompted        44000
grudge_prompted             40800
tit_for_tat_prompted        37600
random_prompted             34400
self_interested_prompted    31200
cooperative                  9600
defecting                    9600
grim_trigger                 9600
random                       9600
tit_for_tat                  9600
Name: count, dtype: int64

Agent2 expected roles distribution:
agent2_role_expected
self_interested_prompted    55200
random_prompted             52000
tit_for_tat_prompted        48800
grudge_prompted             45600
cooperative_prompted        42400
defecting_prompted          39200
Name: count, dtype: int64

âœ“ Agent roles classified

Agent1 expected roles distribution:
agent1_role_expected
defecting_prompted          47200
cooperative_prompted        44000
grudge_prompted          

In [18]:
# ============================================================================
# SECTION 4: CALCULATE COOPERATION AND DEFECTION FLAGS
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 4: CALCULATE COOPERATION AND DEFECTION FLAGS")
print("=" * 80)

# Agent 1 flags
df['agent1_is_cooperation'] = (df['agent1_move'] == 'C').astype(int)
df['agent1_is_defection'] = (df['agent1_move'] == 'D').astype(int)

# Agent 2 flags
df['agent2_is_cooperation'] = (df['agent2_move'] == 'C').astype(int)
df['agent2_is_defection'] = (df['agent2_move'] == 'D').astype(int)

print(f"\nâœ“ Cooperation and defection flags created")
print(f"\nAgent1 cooperation rate: {df['agent1_is_cooperation'].mean() * 100:.2f}%")
print(f"Agent1 defection rate: {df['agent1_is_defection'].mean() * 100:.2f}%")
print(f"Agent2 cooperation rate: {df['agent2_is_cooperation'].mean() * 100:.2f}%")
print(f"Agent2 defection rate: {df['agent2_is_defection'].mean() * 100:.2f}%")

# Verify flags
print(f"\nValidation (flags should sum to 1 for each agent per round):")
print(f"Agent1: {(df['agent1_is_cooperation'] + df['agent1_is_defection']).unique()}")
print(f"Agent2: {(df['agent2_is_cooperation'] + df['agent2_is_defection']).unique()}")



SECTION 4: CALCULATE COOPERATION AND DEFECTION FLAGS

âœ“ Cooperation and defection flags created

Agent1 cooperation rate: 51.13%
Agent1 defection rate: 48.87%
Agent2 cooperation rate: 44.06%
Agent2 defection rate: 55.94%

Validation (flags should sum to 1 for each agent per round):
Agent1: [1]
Agent2: [1]


In [19]:
# ============================================================================
# SECTION 5: COMPUTE RETALIATION AND FORGIVENESS METRICS
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 5: COMPUTE RETALIATION AND FORGIVENESS METRICS")
print("=" * 80)

# For each agent, we need to look at opponent's previous move(s)
# Retaliation: Defection after opponent defection in previous round
# Forgiveness: Cooperation after opponent defection in previous round

def compute_retaliation_metrics(group):
    """Compute retaliation and forgiveness flags for a group (one match)"""
    
    # Agent 1 metrics
    agent1_prev_opponent_move = group['agent2_move'].shift(1)
    group['agent1_retaliation_flag'] = (
        (agent1_prev_opponent_move == 'D') & (group['agent1_move'] == 'D')
    ).astype(int)
    group['agent1_forgiveness_flag'] = (
        (agent1_prev_opponent_move == 'D') & (group['agent1_move'] == 'C')
    ).astype(int)
    
    # Agent 1: Retaliation considering last 5 moves
    # Convert to binary (1 for 'D', 0 for 'C') to use rolling window
    agent2_defection_binary = (group['agent2_move'] == 'D').astype(int).shift(1)
    agent1_retaliation_memories = agent2_defection_binary.rolling(window=5, min_periods=1).max()
    group['agent1_retaliation_flag_memories'] = (
        (agent1_retaliation_memories == 1) & (group['agent1_move'] == 'D')
    ).astype(int)
    
    # Agent 2 metrics
    agent2_prev_opponent_move = group['agent1_move'].shift(1)
    group['agent2_retaliation_flag'] = (
        (agent2_prev_opponent_move == 'D') & (group['agent2_move'] == 'D')
    ).astype(int)
    group['agent2_forgiveness_flag'] = (
        (agent2_prev_opponent_move == 'D') & (group['agent2_move'] == 'C')
    ).astype(int)
    
    # Agent 2: Retaliation considering last 5 moves
    # Convert to binary (1 for 'D', 0 for 'C') to use rolling window
    agent1_defection_binary = (group['agent1_move'] == 'D').astype(int).shift(1)
    agent2_retaliation_memories = agent1_defection_binary.rolling(window=5, min_periods=1).max()
    group['agent2_retaliation_flag_memories'] = (
        (agent2_retaliation_memories == 1) & (group['agent2_move'] == 'D')
    ).astype(int)
    
    return group

print("\nComputing retaliation and forgiveness metrics...")
df = df.groupby('match_id', group_keys=False).apply(compute_retaliation_metrics)

print(f"âœ“ Retaliation and forgiveness metrics computed")
print(f"\nAgent1 statistics:")
print(f"  - Retaliation (immediate): {df['agent1_retaliation_flag'].sum():,} instances ({df['agent1_retaliation_flag'].mean() * 100:.2f}%)")
print(f"  - Retaliation (within 5 moves): {df['agent1_retaliation_flag_memories'].sum():,} instances ({df['agent1_retaliation_flag_memories'].mean() * 100:.2f}%)")
print(f"  - Forgiveness: {df['agent1_forgiveness_flag'].sum():,} instances ({df['agent1_forgiveness_flag'].mean() * 100:.2f}%)")

print(f"\nAgent2 statistics:")
print(f"  - Retaliation (immediate): {df['agent2_retaliation_flag'].sum():,} instances ({df['agent2_retaliation_flag'].mean() * 100:.2f}%)")
print(f"  - Retaliation (within 5 moves): {df['agent2_retaliation_flag_memories'].sum():,} instances ({df['agent2_retaliation_flag_memories'].mean() * 100:.2f}%)")
print(f"  - Forgiveness: {df['agent2_forgiveness_flag'].sum():,} instances ({df['agent2_forgiveness_flag'].mean() * 100:.2f}%)")



SECTION 5: COMPUTE RETALIATION AND FORGIVENESS METRICS

Computing retaliation and forgiveness metrics...
âœ“ Retaliation and forgiveness metrics computed

Agent1 statistics:
  - Retaliation (immediate): 130,641 instances (46.13%)
  - Retaliation (within 5 moves): 134,069 instances (47.34%)
  - Forgiveness: 26,994 instances (9.53%)

Agent2 statistics:
  - Retaliation (immediate): 131,455 instances (46.42%)
  - Retaliation (within 5 moves): 143,852 instances (50.80%)
  - Forgiveness: 6,267 instances (2.21%)
âœ“ Retaliation and forgiveness metrics computed

Agent1 statistics:
  - Retaliation (immediate): 130,641 instances (46.13%)
  - Retaliation (within 5 moves): 134,069 instances (47.34%)
  - Forgiveness: 26,994 instances (9.53%)

Agent2 statistics:
  - Retaliation (immediate): 131,455 instances (46.42%)
  - Retaliation (within 5 moves): 143,852 instances (50.80%)
  - Forgiveness: 6,267 instances (2.21%)


In [21]:
# ============================================================================
# SECTION 6: CLASSIFY BEHAVIOR PATTERNS
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 6: CLASSIFY BEHAVIOR PATTERNS")
print("=" * 80)

def classify_behavior_pattern(agent_name, coop_rate, retaliation_rate, forgiveness_rate, decision_std):
    """
    Classify behavior pattern based on observed metrics
    
    Patterns:
    - faithful_cooperator: High cooperation, low retaliation, high forgiveness
    - systematic_defector: Low cooperation, high retaliation
    - tit_for_tat_like: Medium cooperation, high retaliation, high forgiveness
    - conditional_cooperator: Medium-high cooperation, reactive
    - chaotic: High variance, unpredictable
    """
    
    if coop_rate > 75:
        return 'faithful_cooperator'
    elif coop_rate < 30:
        return 'systematic_defector'
    elif coop_rate > 60 and retaliation_rate > 40 and forgiveness_rate > 30:
        return 'tit_for_tat_like'
    elif coop_rate > 50 and decision_std > 0.3:
        return 'conditional_cooperator'
    elif decision_std > 0.4:
        return 'chaotic'
    else:
        return 'balanced'

# Calculate per-agent metrics with proper groupby
print("Computing behavior patterns for agent1...")
agent1_patterns = []
for match_id, group in df.groupby('match_id'):
    coop_rate = group['agent1_is_cooperation'].mean() * 100
    retaliation_rate = group['agent1_retaliation_flag'].mean() * 100
    forgiveness_rate = group['agent1_forgiveness_flag'].mean() * 100
    decision_std = group['agent1_is_cooperation'].std()
    agent_name = group['agent1_name'].iloc[0]
    
    pattern = classify_behavior_pattern(agent_name, coop_rate, retaliation_rate, forgiveness_rate, decision_std)
    agent1_patterns.append({'match_id': match_id, 'agent1_behavior_pattern': pattern})

agent1_patterns_df = pd.DataFrame(agent1_patterns)

print("Computing behavior patterns for agent2...")
agent2_patterns = []
for match_id, group in df.groupby('match_id'):
    coop_rate = group['agent2_is_cooperation'].mean() * 100
    retaliation_rate = group['agent2_retaliation_flag'].mean() * 100
    forgiveness_rate = group['agent2_forgiveness_flag'].mean() * 100
    decision_std = group['agent2_is_cooperation'].std()
    agent_name = group['agent2_name'].iloc[0]
    
    pattern = classify_behavior_pattern(agent_name, coop_rate, retaliation_rate, forgiveness_rate, decision_std)
    agent2_patterns.append({'match_id': match_id, 'agent2_behavior_pattern': pattern})

agent2_patterns_df = pd.DataFrame(agent2_patterns)

# Merge behavior patterns back to main dataframe
df = df.merge(agent1_patterns_df, on='match_id', how='left')
df = df.merge(agent2_patterns_df, on='match_id', how='left')

print(f"\nâœ“ Behavior patterns classified")
print(f"\nAgent1 behavior pattern distribution:")
print(df.drop_duplicates('match_id')['agent1_behavior_pattern'].value_counts())
print(f"\nAgent2 behavior pattern distribution:")
print(df.drop_duplicates('match_id')['agent2_behavior_pattern'].value_counts())



SECTION 6: CLASSIFY BEHAVIOR PATTERNS
Computing behavior patterns for agent1...
Computing behavior patterns for agent2...
Computing behavior patterns for agent2...

âœ“ Behavior patterns classified

Agent1 behavior pattern distribution:
agent1_behavior_pattern
faithful_cooperator       653
systematic_defector       647
chaotic                    61
conditional_cooperator     55
Name: count, dtype: int64

Agent2 behavior pattern distribution:
agent2_behavior_pattern
systematic_defector       773
faithful_cooperator       590
conditional_cooperator     29
chaotic                    24
Name: count, dtype: int64

âœ“ Behavior patterns classified

Agent1 behavior pattern distribution:
agent1_behavior_pattern
faithful_cooperator       653
systematic_defector       647
chaotic                    61
conditional_cooperator     55
Name: count, dtype: int64

Agent2 behavior pattern distribution:
agent2_behavior_pattern
systematic_defector       773
faithful_cooperator       590
conditional_coope

In [26]:
# ============================================================================
# SECTION 7: CALCULATE MATCH OUTCOMES AND SCORES
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 7: CALCULATE MATCH OUTCOMES AND SCORES")
print("=" * 80)

# Calculate per-match scores and outcomes
match_outcomes = df.groupby('match_id').agg({
    'agent1_score': 'sum',
    'agent2_score': 'sum',
    'agent1_is_cooperation': 'mean',
    'agent2_is_cooperation': 'mean',
}).reset_index()

match_outcomes.columns = ['match_id', 'agent1_match_score', 'agent2_match_score', 
                         'agent1_match_cooperation_rate', 'agent2_match_cooperation_rate']

# Determine match outcome (win/loss/draw)
match_outcomes['match_outcome'] = match_outcomes.apply(
    lambda x: 'agent1_win' if x['agent1_match_score'] > x['agent2_match_score']
    else ('agent2_win' if x['agent2_match_score'] > x['agent1_match_score'] else 'draw'),
    axis=1
)

# Convert to percentage
match_outcomes['agent1_match_cooperation_rate'] = match_outcomes['agent1_match_cooperation_rate'] * 100
match_outcomes['agent2_match_cooperation_rate'] = match_outcomes['agent2_match_cooperation_rate'] * 100

# Merge back to main dataframe
df = df.merge(match_outcomes, on='match_id', how='left')

print(f"\nâœ“ Match outcomes and scores calculated")
print(f"\nMatch outcome distribution:")
print(match_outcomes['match_outcome'].value_counts())

print(f"\nScore statistics:")
print(f"  Agent1 match score - Min: {match_outcomes['agent1_match_score'].min():.0f}, "
      f"Max: {match_outcomes['agent1_match_score'].max():.0f}, "
      f"Mean: {match_outcomes['agent1_match_score'].mean():.2f}")
print(f"  Agent2 match score - Min: {match_outcomes['agent2_match_score'].min():.0f}, "
      f"Max: {match_outcomes['agent2_match_score'].max():.0f}, "
      f"Mean: {match_outcomes['agent2_match_score'].mean():.2f}")

print(f"\nCooperation rate statistics (%):")
print(f"  Agent1 - Min: {match_outcomes['agent1_match_cooperation_rate'].min():.2f}, "
      f"Max: {match_outcomes['agent1_match_cooperation_rate'].max():.2f}, "
      f"Mean: {match_outcomes['agent1_match_cooperation_rate'].mean():.2f}")
print(f"  Agent2 - Min: {match_outcomes['agent2_match_cooperation_rate'].min():.2f}, "
      f"Max: {match_outcomes['agent2_match_cooperation_rate'].max():.2f}, "
      f"Mean: {match_outcomes['agent2_match_cooperation_rate'].mean():.2f}")



SECTION 7: CALCULATE MATCH OUTCOMES AND SCORES

âœ“ Match outcomes and scores calculated

Match outcome distribution:
match_outcome
draw          637
agent2_win    519
agent1_win    260
Name: count, dtype: int64

Score statistics:
  Agent1 match score - Min: 0, Max: 1000, Mean: 367.37
  Agent2 match score - Min: 0, Max: 1000, Mean: 438.06

Cooperation rate statistics (%):
  Agent1 - Min: 0.00, Max: 100.00, Mean: 51.13
  Agent2 - Min: 0.00, Max: 100.00, Mean: 44.06

âœ“ Match outcomes and scores calculated

Match outcome distribution:
match_outcome
draw          637
agent2_win    519
agent1_win    260
Name: count, dtype: int64

Score statistics:
  Agent1 match score - Min: 0, Max: 1000, Mean: 367.37
  Agent2 match score - Min: 0, Max: 1000, Mean: 438.06

Cooperation rate statistics (%):
  Agent1 - Min: 0.00, Max: 100.00, Mean: 51.13
  Agent2 - Min: 0.00, Max: 100.00, Mean: 44.06


In [23]:
# ============================================================================
# SECTION 8: GENERATE CONTEXT AND TEMPERATURE FEATURES
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 8: GENERATE CONTEXT AND TEMPERATURE FEATURES")
print("=" * 80)

# Context used flag
df['agent1_context_used_flag'] = df['agent1_context_mentioned'].astype(int)
df['agent2_context_used_flag'] = df['agent2_context_mentioned'].astype(int)

# Temperature bucket (only for LLM agents)
def get_temperature_bucket(agent_type, temperature):
    if agent_type == 'Strategy':
        return 'coded'
    elif pd.isna(temperature):
        return 'unknown'
    elif temperature <= 0.7:
        return 'low'
    elif temperature <= 1.0:
        return 'medium'
    else:
        return 'high'

df['agent1_temperature_bucket'] = df.apply(
    lambda x: get_temperature_bucket(x['agent1_type'], x['agent1_temperature']), axis=1
)
df['agent2_temperature_bucket'] = df.apply(
    lambda x: get_temperature_bucket(x['agent2_type'], x['agent2_temperature']), axis=1
)

# Agent family (model name extraction)
def extract_agent_family(agent_name, agent_type):
    """
    Extract agent family/model from agent name.
    
    Coded strategies: 'coded'
    LLM agents: Extract model from name (format: O_MODEL_...)
      - qwen257b -> 'qwen'
      - gemma29b -> 'gemma'
    """
    if agent_type == 'Strategy':
        return 'coded'
    else:
        name_lower = agent_name.lower()
        if 'qwen' in name_lower:
            return 'qwen'
        elif 'gemma' in name_lower:
            return 'gemma'
        else:
            return 'unknown_llm'

df['agent1_family'] = df.apply(
    lambda x: extract_agent_family(x['agent1_name'], x['agent1_type']), axis=1
)
df['agent2_family'] = df.apply(
    lambda x: extract_agent_family(x['agent2_name'], x['agent2_type']), axis=1
)

print(f"\nâœ“ Context and temperature features generated")
print(f"\nAgent1 context usage:")
print(f"  With context: {df['agent1_context_used_flag'].sum():,} ({df['agent1_context_used_flag'].mean() * 100:.2f}%)")
print(f"  Without context: {(1 - df['agent1_context_used_flag']).sum():,} ({(1 - df['agent1_context_used_flag']).mean() * 100:.2f}%)")

print(f"\nAgent1 temperature bucket distribution:")
print(df['agent1_temperature_bucket'].value_counts())

print(f"\nAgent1 family distribution:")
print(df.drop_duplicates('agent1_name')['agent1_family'].value_counts())



SECTION 8: GENERATE CONTEXT AND TEMPERATURE FEATURES

âœ“ Context and temperature features generated

Agent1 context usage:
  With context: 120,000 (42.37%)
  Without context: 163,200 (57.63%)

Agent1 temperature bucket distribution:
agent1_temperature_bucket
low      146400
high      88800
coded     48000
Name: count, dtype: int64

Agent1 family distribution:
agent1_family
gemma    24
qwen     24
coded     5
Name: count, dtype: int64

âœ“ Context and temperature features generated

Agent1 context usage:
  With context: 120,000 (42.37%)
  Without context: 163,200 (57.63%)

Agent1 temperature bucket distribution:
agent1_temperature_bucket
low      146400
high      88800
coded     48000
Name: count, dtype: int64

Agent1 family distribution:
agent1_family
gemma    24
qwen     24
coded     5
Name: count, dtype: int64


In [24]:
# ============================================================================
# SECTION 9: COMPUTE CONFORMITY SCORES
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 9: COMPUTE CONFORMITY SCORES")
print("=" * 80)

def compute_conformity_score(observed_pattern, expected_role):
    """
    Compute conformity between observed behavior pattern and expected role.
    
    Maps observed behavior patterns to expected strategic roles.
    Conformity score: 1.0 (perfect match) to 0.2 (no match)
    """
    
    conformity_map = {
        # Coded strategies
        'tit_for_tat': {
            'tit_for_tat': 1.0,
            'tit_for_tat_prompted': 0.9,
            'conditional_cooperator': 0.8,
            'balanced': 0.6,
        },
        'cooperative': {
            'cooperative': 1.0,
            'cooperative_prompted': 0.9,
            'faithful_cooperator': 0.95,
            'conditional_cooperator': 0.7,
        },
        'defecting': {
            'defecting': 1.0,
            'defecting_prompted': 0.9,
            'systematic_defector': 0.95,
        },
        'random': {
            'random': 1.0,
            'random_prompted': 0.9,
            'chaotic': 0.85,
        },
        'grim_trigger': {
            'grim_trigger': 1.0,
            'grudge_prompted': 0.85,
            'tit_for_tat_like': 0.7,
        },
        
        # Observed behavior patterns to role matching
        'faithful_cooperator': {
            'cooperative': 1.0,
            'cooperative_prompted': 0.95,
            'tit_for_tat': 0.6,
            'conditional_cooperator': 0.7,
        },
        'systematic_defector': {
            'defecting': 1.0,
            'defecting_prompted': 0.95,
            'random': 0.5,
        },
        'tit_for_tat_like': {
            'tit_for_tat': 1.0,
            'tit_for_tat_prompted': 0.95,
            'grim_trigger': 0.8,
            'conditional_cooperator': 0.7,
        },
        'conditional_cooperator': {
            'tit_for_tat': 0.8,
            'tit_for_tat_prompted': 0.8,
            'cooperative': 0.7,
            'cooperative_prompted': 0.7,
            'self_interested_prompted': 0.6,
        },
        'chaotic': {
            'random': 1.0,
            'random_prompted': 0.95,
            'unknown_llm': 0.6,
            'self_interested_prompted': 0.5,
        },
        'balanced': {
            'tit_for_tat': 0.8,
            'tit_for_tat_prompted': 0.8,
            'conditional_cooperator': 0.7,
            'self_interested_prompted': 0.6,
            'unknown_llm': 0.5,
        }
    }
    
    # Get matching scores for the observed pattern
    pattern_scores = conformity_map.get(observed_pattern, {})
    
    # Return the score for this expected role, or 0.2 if no match found
    return pattern_scores.get(expected_role, 0.2)

# Calculate conformity scores
df['agent1_conformity_score'] = df.apply(
    lambda x: compute_conformity_score(x['agent1_behavior_pattern'], x['agent1_role_expected']),
    axis=1
)
df['agent2_conformity_score'] = df.apply(
    lambda x: compute_conformity_score(x['agent2_behavior_pattern'], x['agent2_role_expected']),
    axis=1
)

print(f"\nâœ“ Conformity scores computed")
print(f"\nAgent1 conformity score statistics:")
print(f"  Mean: {df['agent1_conformity_score'].mean():.3f}")
print(f"  Median: {df['agent1_conformity_score'].median():.3f}")
print(f"  Std: {df['agent1_conformity_score'].std():.3f}")

print(f"\nAgent1 conformity score distribution:")
conformity_dist_a1 = pd.cut(df['agent1_conformity_score'], bins=[0, 0.25, 0.5, 0.75, 1.0], 
                             labels=['Low (0-0.25)', 'Medium (0.25-0.5)', 'High (0.5-0.75)', 'Very High (0.75-1.0)'])
print(conformity_dist_a1.value_counts().sort_index())

print(f"\nAgent2 conformity score statistics:")
print(f"  Mean: {df['agent2_conformity_score'].mean():.3f}")
print(f"  Median: {df['agent2_conformity_score'].median():.3f}")
print(f"  Std: {df['agent2_conformity_score'].std():.3f}")



SECTION 9: COMPUTE CONFORMITY SCORES

âœ“ Conformity scores computed

Agent1 conformity score statistics:
  Mean: 0.468
  Median: 0.200
  Std: 0.357

Agent1 conformity score distribution:
agent1_conformity_score
Low (0-0.25)            179200
Medium (0.25-0.5)            0
High (0.5-0.75)          11200
Very High (0.75-1.0)     92800
Name: count, dtype: int64

Agent2 conformity score statistics:
  Mean: 0.362
  Median: 0.200
  Std: 0.306

âœ“ Conformity scores computed

Agent1 conformity score statistics:
  Mean: 0.468
  Median: 0.200
  Std: 0.357

Agent1 conformity score distribution:
agent1_conformity_score
Low (0-0.25)            179200
Medium (0.25-0.5)            0
High (0.5-0.75)          11200
Very High (0.75-1.0)     92800
Name: count, dtype: int64

Agent2 conformity score statistics:
  Mean: 0.362
  Median: 0.200
  Std: 0.306


In [27]:
# ============================================================================
# SECTION 10: EXPORT TRANSFORMED DATASET
# ============================================================================

print("\n" + "=" * 80)
print("SECTION 10: EXPORT TRANSFORMED DATASET")
print("=" * 80)

# Select and rename columns for final dataset
final_columns = {
    'match_id': 'match_id',
    'round_id': 'round_id',
    'agent1_family': 'agent1_family',
    'agent2_family': 'agent2_family',
    'agent1_role_expected': 'agent1_role_expected',
    'agent2_role_expected': 'agent2_role_expected',
    'agent1_context_used_flag': 'agent1_context_used_flag',
    'agent2_context_used_flag': 'agent2_context_used_flag',
    'agent1_temperature_bucket': 'agent1_temperature_bucket',
    'agent2_temperature_bucket': 'agent2_temperature_bucket',
    'agent1_is_cooperation': 'agent1_is_cooperation',
    'agent1_is_defection': 'agent1_is_defection',
    'agent2_is_cooperation': 'agent2_is_cooperation',
    'agent2_is_defection': 'agent2_is_defection',
    'agent1_retaliation_flag': 'agent1_retaliation_flag',
    'agent1_retaliation_flag_memories': 'agent1_retaliation_flag_memories',
    'agent1_forgiveness_flag': 'agent1_forgiveness_flag',
    'agent2_retaliation_flag': 'agent2_retaliation_flag',
    'agent2_retaliation_flag_memories': 'agent2_retaliation_flag_memories',
    'agent2_forgiveness_flag': 'agent2_forgiveness_flag',
    'agent1_behavior_pattern': 'agent1_behavior_pattern',
    'agent2_behavior_pattern': 'agent2_behavior_pattern',
    'agent1_match_score': 'agent1_match_score',
    'agent2_match_score': 'agent2_match_score',
    'agent1_match_cooperation_rate': 'agent1_match_cooperation_rate',
    'agent2_match_cooperation_rate': 'agent2_match_cooperation_rate',
    'agent1_conformity_score': 'agent1_conformity_score',
    'agent2_conformity_score': 'agent2_conformity_score',
    'match_outcome': 'match_outcome',
}

df_final = df[list(final_columns.keys())].copy()
df_final.rename(columns=final_columns, inplace=True)

# Create output directory
output_dir = 'enriched_data'
os.makedirs(output_dir, exist_ok=True)

# Export to parquet
output_file = os.path.join(output_dir, 'enriched_games_full.parquet')
df_final.to_parquet(output_file, index=False)

print(f"\nâœ“ Final dataset exported to {output_file}")
print(f"  - Shape: {df_final.shape}")
print(f"  - File size: {os.path.getsize(output_file) / (1024*1024):.2f} MB")

# Display sample rows
print(f"\nðŸ“‹ Sample of transformed data (first 3 rows):")
print(df_final.head(3).to_string())

# Dataset info
print(f"\nðŸ“Š Dataset column information:")
print(df_final.dtypes)

# Summary statistics
print(f"\nðŸ“ˆ Summary statistics:")
print(f"  - Total rows: {len(df_final):,}")
print(f"  - Total columns: {len(df_final.columns)}")
print(f"  - Unique matches: {df_final['match_id'].nunique()}")
print(f"  - Memory footprint: {df_final.memory_usage(deep=True).sum() / (1024*1024):.2f} MB")

print("\n" + "=" * 80)
print("âœ… DATA ENRICHMENT PIPELINE COMPLETED SUCCESSFULLY")
print("=" * 80)



SECTION 10: EXPORT TRANSFORMED DATASET

âœ“ Final dataset exported to enriched_data/enriched_games_full.parquet
  - Shape: (283200, 29)
  - File size: 0.12 MB

ðŸ“‹ Sample of transformed data (first 3 rows):
   match_id  round_id agent1_family agent2_family  agent1_role_expected  agent2_role_expected  agent1_context_used_flag  agent2_context_used_flag agent1_temperature_bucket agent2_temperature_bucket  agent1_is_cooperation  agent1_is_defection  agent2_is_cooperation  agent2_is_defection  agent1_retaliation_flag  agent1_retaliation_flag_memories  agent1_forgiveness_flag  agent2_retaliation_flag  agent2_retaliation_flag_memories  agent2_forgiveness_flag agent1_behavior_pattern agent2_behavior_pattern  agent1_match_score  agent2_match_score  agent1_match_cooperation_rate  agent2_match_cooperation_rate  agent1_conformity_score  agent2_conformity_score match_outcome
0  32cb150b         1         gemma         gemma  cooperative_prompted  cooperative_prompted                         1    

In [28]:
df_final

Unnamed: 0,match_id,round_id,agent1_family,agent2_family,agent1_role_expected,agent2_role_expected,agent1_context_used_flag,agent2_context_used_flag,agent1_temperature_bucket,agent2_temperature_bucket,...,agent2_forgiveness_flag,agent1_behavior_pattern,agent2_behavior_pattern,agent1_match_score,agent2_match_score,agent1_match_cooperation_rate,agent2_match_cooperation_rate,agent1_conformity_score,agent2_conformity_score,match_outcome
0,32cb150b,1,gemma,gemma,cooperative_prompted,cooperative_prompted,1,1,low,low,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.95,0.95,draw
1,32cb150b,2,gemma,gemma,cooperative_prompted,cooperative_prompted,1,1,low,low,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.95,0.95,draw
2,32cb150b,3,gemma,gemma,cooperative_prompted,cooperative_prompted,1,1,low,low,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.95,0.95,draw
3,32cb150b,4,gemma,gemma,cooperative_prompted,cooperative_prompted,1,1,low,low,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.95,0.95,draw
4,32cb150b,5,gemma,gemma,cooperative_prompted,cooperative_prompted,1,1,low,low,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.95,0.95,draw
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283195,0e7cf5c8,196,coded,qwen,tit_for_tat,tit_for_tat_prompted,0,0,coded,high,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.60,0.20,draw
283196,0e7cf5c8,197,coded,qwen,tit_for_tat,tit_for_tat_prompted,0,0,coded,high,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.60,0.20,draw
283197,0e7cf5c8,198,coded,qwen,tit_for_tat,tit_for_tat_prompted,0,0,coded,high,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.60,0.20,draw
283198,0e7cf5c8,199,coded,qwen,tit_for_tat,tit_for_tat_prompted,0,0,coded,high,...,0,faithful_cooperator,faithful_cooperator,600,600,100.0,100.0,0.60,0.20,draw
