# Figure 1: Cross-entropy Loss Across Models and Authors

This notebook generates Figure 1 from the paper, which shows:
- **Part A**: Training curves showing cross-entropy loss over epochs for each model
- **Part B**: Strip plot showing loss distributions per model on held-out test data

As described in the paper:
> "We train a GPT-2 model on each author's corpus and use the trained model to compute the cross-entropy loss on held-out texts from both the target author and each of the other authors in the dataset. By comparing these losses, we assess whether the model captures author-specific stylistic patterns."

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set style for publication-quality figures
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

In [None]:
# Load the consolidated model results
data_path = Path('../data/model_results.pkl')
df = pd.read_pickle(data_path)

print(f"Loaded {len(df):,} rows from {df['model_name'].nunique()} models")
print(f"Authors: {sorted(df['author'].unique())}")
print(f"Seeds: {sorted(df['seed'].unique())}")

## Part A: Training Curves

This plot shows the average cross-entropy loss on training data and held-out test data from each author, plotted as a function of the number of training epochs. Each color denotes a model trained on a single author's work. Error ribbons denote bootstrap-estimated 95% confidence intervals over 10 random seeds.

In [None]:
# Define standardized author colors and order
AUTHOR_COLORS = {
    "baum": "#1f77b4",
    "thompson": "#ff7f0e",
    "austen": "#2ca02c",
    "dickens": "#d62728",
    "fitzgerald": "#9467bd",
    "melville": "#8c564b",
    "twain": "#e377c2",
    "wells": "#7f7f7f",
}

AUTHOR_ORDER = ["baum", "thompson", "austen", "dickens", 
                "fitzgerald", "melville", "twain", "wells"]

In [None]:
def plot_training_curves(df, max_epochs=1000, sample_every=10):
    """Plot training curves for all models."""
    
    fig, axes = plt.subplots(3, 3, figsize=(15, 12))
    axes = axes.flatten()
    
    # Plot for training data and each evaluation author
    datasets = ['train'] + AUTHOR_ORDER
    
    for idx, dataset in enumerate(datasets):
        ax = axes[idx]
        
        for train_author in AUTHOR_ORDER:
            # Filter data for this training author and dataset
            subset = df[
                (df['train_author'] == train_author) & 
                (df['loss_dataset'] == dataset) &
                (df['epochs_completed'] <= max_epochs) &
                (df['epochs_completed'] % sample_every == 0)  # Sample for performance
            ]
            
            if len(subset) == 0:
                continue
            
            # Calculate mean and confidence intervals
            grouped = subset.groupby('epochs_completed')['loss_value']
            mean_loss = grouped.mean()
            std_loss = grouped.std()
            count = grouped.count()
            
            # Calculate 95% CI
            ci = 1.96 * std_loss / np.sqrt(count)
            
            # Plot
            ax.plot(mean_loss.index, mean_loss.values, 
                   color=AUTHOR_COLORS[train_author], 
                   label=train_author.capitalize(), 
                   linewidth=1.5, alpha=0.8)
            
            # Add confidence interval
            ax.fill_between(mean_loss.index, 
                           mean_loss.values - ci.values,
                           mean_loss.values + ci.values,
                           color=AUTHOR_COLORS[train_author], 
                           alpha=0.15)
        
        ax.set_title(f"Evaluation: {dataset.capitalize()}" if dataset != 'train' 
                    else "Training Data", fontsize=12)
        ax.set_xlabel('Training Epochs', fontsize=10)
        ax.set_ylabel('Cross-entropy Loss', fontsize=10)
        ax.set_xlim(0, max_epochs)
        ax.set_ylim(2.5, 5.5)
        ax.grid(True, alpha=0.3)
        
        if idx == 0:
            ax.legend(loc='upper right', fontsize=8, ncol=2)
    
    # Hide the last subplot (we have 9 subplots but only 9 datasets)
    plt.suptitle('Figure 1A: Cross-entropy Loss Across Training', fontsize=14, y=1.02)
    plt.tight_layout()
    
    return fig

In [None]:
# Generate Figure 1A
fig_1a = plot_training_curves(df, max_epochs=1000, sample_every=10)
plt.show()

# Save the figure
output_path = Path('../paper/figs/source/all_losses_generated.pdf')
fig_1a.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"Saved Figure 1A to {output_path}")

## Part B: Loss Distribution Strip Plot

This plot shows the cross-entropy loss assigned to held-out test data by each author's model. Each point represents the loss for a specific held-out text, with colors indicating which author wrote the text.

In [None]:
def plot_loss_distributions(df):
    """Create strip plot of loss distributions."""
    
    # Get final epoch losses for each model
    final_losses = []
    
    for model_name in df['model_name'].unique():
        model_df = df[df['model_name'] == model_name]
        max_epoch = model_df['epochs_completed'].max()
        final_epoch_df = model_df[
            (model_df['epochs_completed'] == max_epoch) &
            (model_df['loss_dataset'].isin(AUTHOR_ORDER))  # Only author datasets
        ]
        final_losses.append(final_epoch_df)
    
    final_df = pd.concat(final_losses, ignore_index=True)
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Create strip plot
    for i, train_author in enumerate(AUTHOR_ORDER):
        author_data = final_df[final_df['train_author'] == train_author]
        
        # Plot points for each evaluation author
        for eval_author in AUTHOR_ORDER:
            eval_data = author_data[author_data['loss_dataset'] == eval_author]
            
            if len(eval_data) > 0:
                # Add jitter for visibility
                x_positions = np.random.normal(i, 0.15, len(eval_data))
                
                ax.scatter(x_positions, eval_data['loss_value'].values,
                          color=AUTHOR_COLORS[eval_author],
                          alpha=0.6, s=30,
                          label=eval_author.capitalize() if i == 0 else '')
    
    # Customize the plot
    ax.set_xticks(range(len(AUTHOR_ORDER)))
    ax.set_xticklabels([a.capitalize() for a in AUTHOR_ORDER], rotation=45, ha='right')
    ax.set_xlabel('Model Trained On', fontsize=12)
    ax.set_ylabel('Cross-entropy Loss', fontsize=12)
    ax.set_title('Figure 1B: Loss Distributions by Model', fontsize=14)
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add legend
    handles, labels = ax.get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    ax.legend(by_label.values(), by_label.keys(), 
             title='Evaluation Text Author',
             bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    return fig

In [None]:
# Generate Figure 1B
fig_1b = plot_loss_distributions(df)
plt.show()

# Save the figure
output_path = Path('../paper/figs/source/stripplot_generated.pdf')
fig_1b.savefig(output_path, dpi=300, bbox_inches='tight')
print(f"Saved Figure 1B to {output_path}")

## Key Observations

From these plots, we can observe:

1. **Training convergence**: All models show decreasing training loss over epochs, converging to different final values depending on the author.

2. **Author-specific patterns**: Models achieve lower cross-entropy loss on texts from the author they were trained on (diagonal pattern in evaluation).

3. **Stylistic similarity**: Some author pairs show lower cross-model losses, suggesting stylistic similarities.

4. **Variance across seeds**: The confidence intervals show the consistency of results across different random initializations.

These results support the paper's claim that "a model trained on a given author should exhibit lower loss when predicting that author's own texts as compared to the texts of others."

In [None]:
# Calculate and display some summary statistics
final_epoch_data = df[df['epochs_completed'] == df.groupby('model_name')['epochs_completed'].transform('max')]

print("Average loss by model on same-author vs different-author texts:")
print("="*60)

for train_author in AUTHOR_ORDER:
    author_models = final_epoch_data[final_epoch_data['train_author'] == train_author]
    
    same_author = author_models[author_models['loss_dataset'] == train_author]['loss_value'].mean()
    diff_author = author_models[author_models['loss_dataset'] != train_author]['loss_value'].mean()
    
    print(f"{train_author.capitalize():12} - Same: {same_author:.3f}, Different: {diff_author:.3f}, "
          f"Difference: {diff_author - same_author:.3f}")