In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Ensure display settings are optimal
pd.set_option('display.max_columns', 20)
pd.set_option('display.width', 1000)

# Add parent directory to path
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir) if current_dir.endswith('notebooks') else current_dir
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Define function to load data
def load_data(data_path='data/cleaned_election_data.csv'):
    try:
        df = pd.read_csv(data_path, low_memory=False)
        print(f"Loaded dataset with {df.shape[0]} rows and {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"Error loading data: {e}")
        return None
        
def calculate_gini(values):
    if len(values) <= 1 or values.sum() == 0:
        return 0
    sorted_values = np.sort(values)
    n = len(sorted_values)
    cum_values = np.cumsum(sorted_values)
    return 1 - 2 * np.sum(cum_values / cum_values[-1]) / n + 1 / n

def analyze_traders(n_markets=None, data_path='data/cleaned_election_data.csv', save_path='results/trader_analysis'):
    """
    Run comprehensive trader analysis
    
    Parameters:
    -----------
    n_markets : int, optional
        Number of markets to analyze (None = all)
    data_path : str
        Path to the main dataset
    save_path : str
        Path to save results
        
    Returns:
    --------
    dict
        Dictionary with analysis results
    """
    # Create output directory
    os.makedirs(save_path, exist_ok=True)
    print(f"Saving results to {save_path}")
    
    # Load data directly using the path
    market_data = load_data(data_path)
    if market_data is None:
        print("Failed to load data")
        return None
    
    # Filter to top N markets by volume if specified
    if n_markets is not None:
        if 'volumeNum' in market_data.columns:
            market_data = market_data.sort_values('volumeNum', ascending=False).head(n_markets)
            print(f"Analyzing top {n_markets} markets by volume")
        else:
            market_data = market_data.head(n_markets)
            print(f"Analyzing first {n_markets} markets (volume data not available)")
    
    # Just do a basic analysis with trader concentration for now
    concentration_results = {}
    if 'unique_traders_count' in market_data.columns:
        trader_gini = calculate_gini(market_data['unique_traders_count'].dropna())
        concentration_results['Trader Count'] = trader_gini
        
    if 'volumeNum' in market_data.columns:
        volume_gini = calculate_gini(market_data['volumeNum'].dropna())
        concentration_results['Trading Volume'] = volume_gini
    
    # Format simple results dictionary to return
    results = {
        'market_data': market_data,
        'concentration': {'overall_metrics': concentration_results}
    }
    
    return results

## Load and Explore Data

In [None]:
data_path = 'data/cleaned_election_data.csv'  # According to datastructure.md
save_path = 'results/trader_analysis'

# Load the data directly to examine structure
try:
    market_data = pd.read_csv(data_path, low_memory=False)
    print(f"Successfully loaded data with {market_data.shape[0]} rows and {market_data.shape[1]} columns")
    
    # List trader-related columns
    trader_columns = [col for col in market_data.columns if any(term in col.lower() for term in 
                                                              ['trader', 'concentration', 'volume', 'trade'])]
    print("\nTrader-related columns:")
    for col in trader_columns:
        print(f"- {col}")
except Exception as e:
    print(f"Error loading data: {e}")
    market_data = None

# Run the trader analysis using the modified function
# Run the analysis on the top 100 markets by volume
try:
    results = analyze_traders(n_markets=100, data_path=data_path, save_path=save_path)
    
    if results is not None:
        print("\nAnalysis completed successfully")
        
        # Display concentration metrics
        if 'concentration' in results and 'overall_metrics' in results['concentration']:
            print("\nConcentration Metrics (Gini Coefficients):")
            for metric, value in results['concentration']['overall_metrics'].items():
                print(f"{metric}: {value:.4f}")
    else:
        print("Analysis results not available")
except Exception as e:
    print(f"Error running analysis: {e}")
    
    # Run a simplified analysis directly if needed
    if market_data is not None and 'unique_traders_count' in market_data.columns:
        print("\nBasic Trader Concentration Analysis:")
        
        # Calculate Gini for trader count
        trader_gini = calculate_gini(market_data['unique_traders_count'].dropna())
        print(f"Trader Count Gini Coefficient: {trader_gini:.4f}")
        
        # Calculate Gini for volume if available
        if 'volumeNum' in market_data.columns:
            volume_gini = calculate_gini(market_data['volumeNum'].dropna())
            print(f"Trading Volume Gini Coefficient: {volume_gini:.4f}")

# Basic Market Statistics

In [None]:

if market_data is not None:
    # Summary statistics for trader metrics
    trader_metrics = ['unique_traders_count', 'trader_to_trade_ratio', 'trader_concentration']
    available_metrics = [col for col in trader_metrics if col in market_data.columns]
    
    if available_metrics:
        print("Summary statistics for trader metrics:")
        print(market_data[available_metrics].describe())
        
        # Histograms of key metrics
        fig, axes = plt.subplots(1, len(available_metrics), figsize=(15, 5))
        
        # Handle case with only one metric
        if len(available_metrics) == 1:
            axes = [axes]
            
        for i, metric in enumerate(available_metrics):
            # Remove outliers for visualization
            data = market_data[metric].dropna()
            q1, q3 = data.quantile(0.25), data.quantile(0.75)
            iqr = q3 - q1
            upper_bound = q3 + 1.5 * iqr
            filtered_data = data[data <= upper_bound]
            
            sns.histplot(filtered_data, kde=True, ax=axes[i])
            axes[i].set_title(f'Distribution of {metric}')
            axes[i].axvline(data.median(), color='red', linestyle='--', label='Median')
            axes[i].legend()
            
        plt.tight_layout()
        plt.show()
    else:
        print("No trader metrics found in dataset")
else:
    print("Data not available")



# Run Comprehensive Trader Analysis

Now let's run the full trader analysis to get concentration metrics and trader types.


In [None]:

# Run the analysis on the top 100 markets by volume
results = None
try:
    # Try to run with imported function
    if 'analyze_traders' in globals():
        results = analyze_traders(n_markets=100)
    else:
        print("analyze_traders function not available")
except Exception as e:
    print(f"Error running analysis: {e}")

if results is not None:
    print("\nAnalysis completed successfully")
    
    # Display market summary
    if 'market_summary' in results:
        print("\nMarket Summary:")
        print(results['market_summary'].head())
    
    # Display trader type summary if available
    if 'trader_types' in results and results['trader_types'] is not None:
        print("\nTrader Type Distribution:")
        print(results['trader_types']['type_summary'])
        
        # Display trader profiles
        print("\nTrader Type Profiles:")
        print(results['trader_types']['cluster_profiles'])
        
    # Display concentration metrics
    if 'concentration' in results and 'overall_metrics' in results['concentration']:
        print("\nConcentration Metrics (Gini Coefficients):")
        for metric, value in results['concentration']['overall_metrics'].items():
            print(f"{metric}: {value:.4f}")
else:
    print("Analysis results not available")
    
    # Try to do basic analysis with available data
    if market_data is not None and 'unique_traders_count' in market_data.columns:
        print("\nBasic Trader Concentration Analysis:")
        
        # Calculate Gini for trader count
        trader_gini = calculate_gini(market_data['unique_traders_count'].dropna())
        print(f"Trader Count Gini Coefficient: {trader_gini:.4f}")
        
        # Calculate Gini for volume if available
        if 'volumeNum' in market_data.columns:
            volume_gini = calculate_gini(market_data['volumeNum'].dropna())
            print(f"Trading Volume Gini Coefficient: {volume_gini:.4f}")


# Visualize Trader Concentration


In [None]:

if market_data is not None and 'unique_traders_count' in market_data.columns:
    # Create a figure for concentration analysis
    plt.figure(figsize=(10, 6))
    
    # Get trader metrics by election type if available
    if 'event_electionType' in market_data.columns:
        # Group by election type and calculate average trader metrics
        type_metrics = market_data.groupby('event_electionType').agg({
            'unique_traders_count': 'mean',
            'trader_concentration': 'mean' if 'trader_concentration' in market_data.columns else 'count'
        }).sort_values('unique_traders_count', ascending=False)
        
        # Plot top 10 election types by trader count
        top_types = type_metrics.head(10)
        plt.barh(top_types.index, top_types['unique_traders_count'])
        plt.title('Average Number of Traders by Election Type')
        plt.xlabel('Average Number of Traders')
        plt.tight_layout()
        plt.show()
    
    # Create Lorenz curve for trader counts
    plt.figure(figsize=(10, 6))
    traders = market_data['unique_traders_count'].dropna().sort_values()
    cumsum = np.cumsum(traders)
    lorenz_y = cumsum / cumsum.iloc[-1]
    lorenz_x = np.arange(1, len(traders) + 1) / len(traders)
    
    plt.plot(lorenz_x, lorenz_y, label='Lorenz curve')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfect equality')
    plt.fill_between(lorenz_x, lorenz_x, lorenz_y, alpha=0.2)
    
    # Calculate and show Gini coefficient
    gini = calculate_gini(traders)
    plt.title(f'Trader Distribution (Gini Coefficient: {gini:.4f})')
    plt.xlabel('Cumulative % of Markets')
    plt.ylabel('Cumulative % of Traders')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.show()
    
    # Relationship between trader metrics and prediction accuracy
    if 'brier_score' in market_data.columns:
        plt.figure(figsize=(15, 5))
        
        # 1. Traders vs Accuracy
        plt.subplot(1, 3, 1)
        plt.scatter(market_data['unique_traders_count'], 1 - market_data['brier_score'], alpha=0.5)
        plt.title('Traders vs Prediction Accuracy')
        plt.xlabel('Number of Traders')
        plt.ylabel('Accuracy (1 - Brier Score)')
        
        # 2. Concentration vs Accuracy
        if 'trader_concentration' in market_data.columns:
            plt.subplot(1, 3, 2)
            plt.scatter(market_data['trader_concentration'], 1 - market_data['brier_score'], alpha=0.5)
            plt.title('Trader Concentration vs Accuracy')
            plt.xlabel('Trader Concentration')
            plt.ylabel('Accuracy (1 - Brier Score)')
        
        # 3. Volume vs Accuracy
        if 'volumeNum' in market_data.columns:
            plt.subplot(1, 3, 3)
            plt.scatter(np.log10(market_data['volumeNum']), 1 - market_data['brier_score'], alpha=0.5)
            plt.title('Trading Volume vs Accuracy')
            plt.xlabel('Log10(Volume)')
            plt.ylabel('Accuracy (1 - Brier Score)')
        
        plt.tight_layout()
        plt.show()

# Whale Concentration
This section analyzes the impact of "whale" traders (large traders) on market dynamics.
## 1. Define Whale Criteria

In [None]:
print("\n## Defining Whale Traders ##\n")

# Load trade data for a few high-profile markets
sample_markets = []

# Try to find presidential election markets
if 'event_electionType' in market_data.columns:
    presidential_markets = market_data[market_data['event_electionType'].str.contains('Presidential', na=False)]
    if not presidential_markets.empty:
        sample_markets = presidential_markets.sort_values('volumeNum', ascending=False).head(3)['id'].tolist()
else:
    # Fallback to top markets by volume
    sample_markets = market_data.sort_values('volumeNum', ascending=False).head(3)['id'].tolist()

print(f"Selected {len(sample_markets)} markets for whale analysis")

# Function to load and process trade data
def load_trades_for_analysis(market_ids):
    from src.utils.data_loader import load_trade_data
    
    all_trades = []
    for market_id in market_ids:
        try:
            trades = load_trade_data(market_id)
            if trades is not None:
                trades['market_id'] = market_id
                all_trades.append(trades)
        except Exception as e:
            print(f"Error loading trades for market {market_id}: {e}")
    
    if all_trades:
        combined_trades = pd.concat(all_trades, ignore_index=True)
        print(f"Loaded {len(combined_trades)} trades across {len(market_ids)} markets")
        return combined_trades
    else:
        print("No trade data loaded")
        return None

# Load trade data
trade_data = load_trades_for_analysis(sample_markets)

if trade_data is not None:
    # Identify trader columns
    trader_columns = [col for col in trade_data.columns if 'maker' in col.lower() or 'taker' in col.lower()]
    print(f"Trader-related columns: {trader_columns}")
    
    # Create trader_id column (use maker/taker columns)
    if 'maker' in trade_data.columns:
        trade_data['trader_id'] = trade_data['maker']
    elif 'taker' in trade_data.columns:
        trade_data['trader_id'] = trade_data['taker']
    elif 'makerAddress' in trade_data.columns:
        trade_data['trader_id'] = trade_data['makerAddress']
    else:
        print("Warning: No trader identifier column found")
        
    # Define whales as top 5% traders by volume
    whale_threshold = 0.05
    
    try:
        from src.trader_analysis import identify_whales
        whale_results = identify_whales(market_data, trade_data, threshold=whale_threshold, method='volume')
        
        if whale_results:
            print(f"\nIdentified {whale_results['whale_count']} whales out of {whale_results['total_trader_count']} traders ({whale_threshold*100:.1f}%)")
            print(f"Whales control {whale_results['whale_concentration']*100:.2f}% of total trading volume")
            print(f"Average whale volume is {whale_results['whale_to_non_whale_ratio']:.1f}x higher than non-whale volume")
            
            # Display top 5 whales
            print("\nTop 5 whale traders:")
            display(whale_results['whales'].head(5))
        else:
            print("Whale identification failed")
    except Exception as e:
        print(f"Error in whale identification: {e}")
        
        # Fallback implementation
        if 'trader_id' in trade_data.columns and 'trade_amount' in trade_data.columns:
            trader_volumes = trade_data.groupby('trader_id')['trade_amount'].sum().sort_values(ascending=False)
            whale_count = max(1, int(len(trader_volumes) * whale_threshold))
            whales = trader_volumes.head(whale_count)
            
            print(f"\nIdentified {whale_count} whales out of {len(trader_volumes)} traders ({whale_threshold*100:.1f}%)")
            print(f"Whales control {whales.sum() / trader_volumes.sum() * 100:.2f}% of total trading volume")
            whale_ids = whales.index.tolist()
        else:
            print("Cannot identify whales: required columns missing")
            whale_ids = []
else:
    print("No trade data available for whale analysis")
    whale_ids = []


## 2. Movement Analysis

In [None]:
print("\n## Whale Price Impact Analysis ##\n")

if trade_data is not None and 'trader_id' in trade_data.columns and len(whale_ids) > 0:
    try:
        from src.trader_analysis import analyze_whale_impact
        
        impact_results = analyze_whale_impact(market_data, trade_data, whale_ids)
        
        if impact_results:
            print(f"Analyzed whale impact across {impact_results['markets_analyzed']} markets")
            
            if 'avg_whale_price_impact' in impact_results:
                print(f"Average price impact after whale trades: {impact_results['avg_whale_price_impact']:.6f}")
            
            if 'avg_non_whale_price_impact' in impact_results:
                print(f"Average price impact after non-whale trades: {impact_results['avg_non_whale_price_impact']:.6f}")
            
            if 'avg_whale_followed_ratio' in impact_results:
                print(f"Whale trades followed by same-direction non-whale trades: {impact_results['avg_whale_followed_ratio']*100:.1f}%")
            
            # Visualization of price impacts
            if 'whale_price_impact' in impact_results and 'non_whale_price_impact' in impact_results:
                plt.figure(figsize=(10, 6))
                
                # Convert to arrays for plotting
                whale_impacts = np.array(impact_results['whale_price_impact'])
                non_whale_impacts = np.array(impact_results['non_whale_price_impact'])
                
                # Create labels and data for boxplot
                labels = ['Whale Trades', 'Non-Whale Trades']
                data = [whale_impacts, non_whale_impacts]
                
                plt.boxplot(data, labels=labels)
                plt.ylabel('Price Impact')
                plt.title('Whale vs. Non-Whale Trade Price Impact')
                plt.grid(alpha=0.3)
                plt.show()
        else:
            print("Whale impact analysis returned no results")
    except Exception as e:
        print(f"Error in whale impact analysis: {e}")
else:
    print("Cannot analyze whale impact: insufficient data")

## 3. Granger Causality Test

In [None]:
print("\n## Granger Causality Analysis ##\n")

if trade_data is not None and 'trader_id' in trade_data.columns and len(whale_ids) > 0 and len(sample_markets) > 0:
    try:
        from src.trader_analysis import run_granger_causality_test
        
        print("Testing if whale trades Granger-cause price movements:")
        for market_id in sample_markets:
            gc_result = run_granger_causality_test(market_id, trade_data, whale_ids)
            
            if gc_result:
                market_name = market_data[market_data['id'] == market_id]['question'].values[0] if 'question' in market_data.columns else f"Market {market_id}"
                
                print(f"\nMarket: {market_name}")
                print(f"Granger causality: {'Significant' if gc_result['is_significant'] else 'Not significant'}")
                
                if gc_result['is_significant']:
                    print(f"Best lag: {gc_result['best_lag']} (p-value: {gc_result['min_p_value']:.4f})")
                    print(f"Significant lags: {gc_result['significant_lags']}")
                else:
                    print(f"Minimum p-value: {gc_result['min_p_value']:.4f} at lag {gc_result['best_lag']}")
            else:
                print(f"Granger causality test failed for market {market_id}")
    except Exception as e:
        print(f"Error in Granger causality analysis: {e}")
else:
    print("Cannot perform Granger causality test: insufficient data")

# Trader Classification Analysis
This section classifies traders into different types based on their behavior patterns.

In [None]:
print("\n## Trader Type Classification ##\n")

# Ensure we have trade data
if trade_data is not None and 'trader_id' in trade_data.columns:
    # Add necessary columns if missing
    if 'trade_amount' not in trade_data.columns:
        if 'makerAmountFilled' in trade_data.columns:
            trade_data['trade_amount'] = trade_data['makerAmountFilled']
        elif 'takerAmountFilled' in trade_data.columns:
            trade_data['trade_amount'] = trade_data['takerAmountFilled']
        else:
            print("Warning: No trade amount column found, using placeholder values")
            trade_data['trade_amount'] = 1.0
    
    try:
        from src.trader_analysis import classify_traders_by_behavior
        
        # Run classification
        classification_results = classify_traders_by_behavior(trade_data, n_clusters=5)
        
        if classification_results:
            # Display cluster profiles
            print("Trader Type Profiles:")
            display(classification_results['cluster_profiles'])
            
            # Display cluster names
            print("\nIdentified trader types:")
            for cluster_id, name in classification_results['cluster_names'].items():
                count = classification_results['cluster_profiles'].loc[cluster_id, 'count']
                percent = classification_results['cluster_profiles'].loc[cluster_id, 'percentage']
                print(f"- {name}: {count:.0f} traders ({percent:.1f}%)")
            
            # Visualize cluster profiles
            plt.figure(figsize=(12, 8))
            
            # Prepare data for radar chart
            profiles = classification_results['cluster_profiles'].copy()
            
            # Normalize features for comparison
            for col in profiles.columns:
                if col not in ['count', 'percentage']:
                    profiles[col] = profiles[col] / profiles[col].max()
            
            # Set up the radar chart
            feature_cols = [col for col in profiles.columns if col not in ['count', 'percentage']]
            num_features = len(feature_cols)
            
            # Create angles for the radar chart
            angles = np.linspace(0, 2*np.pi, num_features, endpoint=False).tolist()
            angles += angles[:1]  # Close the loop
            
            # Create subplot with polar projection
            ax = plt.subplot(111, polar=True)
            
            # Plot each cluster
            for cluster_id, name in classification_results['cluster_names'].items():
                values = profiles.loc[cluster_id, feature_cols].tolist()
                values += values[:1]  # Close the loop
                
                ax.plot(angles, values, linewidth=2, label=name)
                ax.fill(angles, values, alpha=0.1)
            
            # Set feature labels
            ax.set_xticks(angles[:-1])
            ax.set_xticklabels(feature_cols)
            
            # Add legend and title
            plt.legend(loc='upper right')
            plt.title('Trader Type Characteristics', size=15)
            plt.tight_layout()
            
            plt.show()
            
            # Visualize cluster sizes
            plt.figure(figsize=(10, 6))
            
            # Create bar chart of cluster sizes
            sizes = classification_results['cluster_profiles']['count']
            names = [classification_results['cluster_names'][i] for i in sizes.index]
            
            plt.bar(names, sizes)
            plt.ylabel('Number of Traders')
            plt.title('Trader Distribution by Type')
            plt.xticks(rotation=45, ha='right')
            plt.tight_layout()
            
            plt.show()
            
            # Analyze feature importance
            if 'feature_importance' in classification_results:
                plt.figure(figsize=(10, 6))
                
                feature_imp = classification_results['feature_importance']
                features = list(feature_imp.keys())
                importances = list(feature_imp.values())
                
                # Sort by importance
                sorted_idx = np.argsort(importances)
                plt.barh([features[i] for i in sorted_idx], [importances[i] for i in sorted_idx])
                
                plt.xlabel('Feature Importance')
                plt.title('Features for Trader Classification')
                plt.tight_layout()
                
                plt.show()
        else:
            print("Trader classification failed")
    except Exception as e:
        print(f"Error in trader classification: {e}")
        import traceback
        traceback.print_exc()
else:
    print("Cannot classify traders: insufficient data")

# Structured Output

In [None]:
# Generate structured summary for thesis
if market_data is not None:
    # Create summary dictionary
    thesis_summary = {
        "trader_concentration": {
            "gini_coefficients": {
                "trader_count": calculate_gini(market_data['unique_traders_count'].dropna()),
                "trading_volume": calculate_gini(market_data['volumeNum'].dropna()) if 'volumeNum' in market_data.columns else None
            },
            "percentiles": {
                "trader_count": market_data['unique_traders_count'].quantile([0.1, 0.25, 0.5, 0.75, 0.9]).to_dict(),
                "volume": market_data['volumeNum'].quantile([0.1, 0.25, 0.5, 0.75, 0.9]).to_dict() if 'volumeNum' in market_data.columns else None
            }
        },
        "election_type_analysis": market_data.groupby('event_electionType')['unique_traders_count'].agg(['mean', 'median', 'count']).to_dict() if 'event_electionType' in market_data.columns else None,
        "country_analysis": market_data.groupby('event_country')['unique_traders_count'].agg(['mean', 'median', 'count']).to_dict() if 'event_country' in market_data.columns else None,
        "accuracy_correlation": {
            "trader_count_vs_accuracy": None,
            "volume_vs_accuracy": None
        }
    }
    
    # Calculate correlations correctly (handling different sizes)
    if 'brier_score' in market_data.columns:
        # Create a DataFrame with just the columns we need
        corr_df = market_data[['unique_traders_count', 'brier_score']].dropna()
        if len(corr_df) > 0:
            thesis_summary["accuracy_correlation"]["trader_count_vs_accuracy"] = float(
                np.corrcoef(corr_df['unique_traders_count'], 1 - corr_df['brier_score'])[0,1]
            )
        
        # Volume vs accuracy
        if 'volumeNum' in market_data.columns:
            vol_corr_df = market_data[['volumeNum', 'brier_score']].dropna()
            if len(vol_corr_df) > 0:
                thesis_summary["accuracy_correlation"]["volume_vs_accuracy"] = float(
                    np.corrcoef(vol_corr_df['volumeNum'], 1 - vol_corr_df['brier_score'])[0,1]
                )
    
    # Save as JSON
    import json
    with open(os.path.join(save_path, 'thesis_summary.json'), 'w') as f:
        json.dump(thesis_summary, f, indent=2)
    print(f"Saved thesis summary to {os.path.join(save_path, 'thesis_summary.json')}")

In [None]:
# Generate key findings text
if market_data is not None:
    # Create findings list
    findings = []
    
    # Overall statistics
    findings.append(f"Analysis of {len(market_data)} Polymarket election markets")
    findings.append(f"Average number of unique traders: {market_data['unique_traders_count'].mean():.1f}")
    
    # Concentration metrics
    trader_gini = calculate_gini(market_data['unique_traders_count'].dropna())
    findings.append(f"Trader concentration Gini coefficient: {trader_gini:.4f}")
    
    if 'volumeNum' in market_data.columns:
        volume_gini = calculate_gini(market_data['volumeNum'].dropna())
        findings.append(f"Trading volume concentration Gini coefficient: {volume_gini:.4f}")
    
    # Market type findings
    if 'event_electionType' in market_data.columns:
        type_metrics = market_data.groupby('event_electionType').agg({
            'unique_traders_count': ['mean', 'count']
        })
        type_metrics.columns = ['avg_traders', 'count']
        type_metrics = type_metrics.sort_values('avg_traders', ascending=False)
        
        top_types = type_metrics.head(3)
        findings.append("Election types with highest trader participation:")
        for election_type, row in top_types.iterrows():
            findings.append(f"- {election_type}: {row['avg_traders']:.1f} traders (n={row['count']})")
    
    # Accuracy relationship - fixed correlation calculation
    if 'brier_score' in market_data.columns:
        # Create a DataFrame with just the columns we need for correlation
        corr_df = market_data[['unique_traders_count', 'brier_score']].dropna()
        
        if len(corr_df) > 0:
            corr = np.corrcoef(corr_df['unique_traders_count'], 1 - corr_df['brier_score'])[0,1]
            findings.append(f"Correlation between trader count and prediction accuracy: {corr:.4f}")
            
            # Add interpretation
            if abs(corr) < 0.1:
                findings.append("There appears to be little relationship between trader count and prediction accuracy.")
            elif corr > 0:
                findings.append("Markets with more traders tend to have higher prediction accuracy.")
            else:
                findings.append("Interestingly, markets with fewer traders show higher prediction accuracy.")

    # Save findings
    with open(os.path.join(save_path, 'key_findings.txt'), 'w') as f:
        f.write('\n\n'.join(findings))
    print(f"Saved key findings to {os.path.join(save_path, 'key_findings.txt')}")