## Interpretation of Results

Based on the plots and tables above:

1.  **Accuracy Trends**:
    *   Observe if the accuracy improves or degrades as training progresses (1750 -> 2200).
    *   Compare `Goat` vs `Qloat`. If `Qloat` (Quantized Goat) maintains similar accuracy to `Goat`, it suggests quantization preserves the circuit mechanisms.

2.  **Minimality (Sparsity)**:
    *   Observe if the number of heads decreases or stays stable.
    *   A smaller number of heads with high accuracy implies a more efficient, localized mechanism.

3.  **Component Analysis**:
    *   `struct_reader`: Look at `positional`, `object_value`, `box_label_value`.
    *   `pos_transmitter`, `pos_detector`, `value_fetcher`: Check their specific tasks.

*Note: If accuracy is very low (near 0), it might indicate that the regularization (lambda=0.01) was too strong for that specific task/checkpoint, or the circuit failed to emerge.*


In [None]:
# Pivot table for Accuracy
pivot_acc = df_main.pivot_table(
    index=['Component', 'Task'], 
    columns=['Model', 'Checkpoint'], 
    values='Accuracy'
)
print("Accuracy Table:")
display(pivot_acc)

# Pivot table for Num Heads
pivot_heads = df_main.pivot_table(
    index=['Component', 'Task'], 
    columns=['Model', 'Checkpoint'], 
    values='Num_Heads'
)
print("\nNumber of Heads Table:")
display(pivot_heads)


## Summary Tables
Below are the tabulated results for easy comparison.


In [None]:
for comp in components:
    df_comp = df_main[df_main['Component'] == comp]
    
    g = sns.FacetGrid(df_comp, col="Task", hue="Model", height=4, aspect=1.2, sharey=False)
    g.map(sns.lineplot, "Checkpoint", "Num_Heads", marker="s", linestyle="--")
    g.add_legend()
    g.fig.suptitle(f"Number of Heads vs Checkpoint - Component: {comp}", y=1.05)
    plt.show()


## Visualization: Sparsity (Number of Heads) vs Checkpoint
We plot the number of heads retained in the minimized circuit.
Fewer heads indicate a more minimal circuit (higher sparsity).


In [None]:
# Get unique components
components = df_main['Component'].unique()

for comp in components:
    df_comp = df_main[df_main['Component'] == comp]
    
    g = sns.FacetGrid(df_comp, col="Task", hue="Model", height=4, aspect=1.2, sharey=False)
    g.map(sns.lineplot, "Checkpoint", "Accuracy", marker="o")
    g.add_legend()
    g.fig.suptitle(f"Accuracy vs Checkpoint - Component: {comp}", y=1.05)
    plt.show()


## Visualization: Accuracy vs Checkpoint
We plot the Test Accuracy for each component and task across different checkpoints.
Higher accuracy indicates that the minimized circuit retains the model's performance on the specific task.


In [None]:
# Filter for lambda=0.01 as it seems to be the main experiment
df_main = df[df['Lambda'] == 0.01].copy()

# Display summary
display(df_main.groupby(['Model', 'Checkpoint', 'Component', 'Task'])[['Num_Heads', 'Accuracy']].mean())


In [None]:
def parse_dcm_file(filepath):
    """Parses a DCM result text file to extract heads and accuracy."""
    try:
        with open(filepath, 'r') as f:
            content = f.read()
            
        # Extract Heads
        heads_match = re.search(r"Heads: (\[.*\])", content)
        if heads_match:
            heads_str = heads_match.group(1)
            try:
                heads = ast.literal_eval(heads_str)
                num_heads = len(heads)
                # If heads is a list of lists (groups), count total heads? 
                # Usually DCM returns list of [layer, head].
                # Let's check if it's nested.
                if num_heads > 0 and isinstance(heads[0], list):
                     # It is a list of [layer, head] pairs, so len(heads) is correct count of heads.
                     pass
            except:
                heads = []
                num_heads = 0
        else:
            heads = []
            num_heads = 0
            
        # Extract Accuracy
        acc_match = re.search(r"Test Accuracy: ([\d\.]+)", content)
        if acc_match:
            accuracy = float(acc_match.group(1))
        else:
            accuracy = 0.0
            
        return num_heads, accuracy, heads
    except Exception as e:
        print(f"Error parsing {filepath}: {e}")
        return 0, 0.0, []

def load_dcm_results(base_dir, model_name, checkpoints):
    results = []
    
    for ckpt in checkpoints:
        # Construct path
        # e.g. goat-experiment-2/goat_exp_2_1750/experiment_2/results/DCM/goat_circuit/
        exp_folder = f"{model_name}_exp_2_{ckpt}"
        circuit_folder = f"{model_name}_circuit"
        
        dcm_path = os.path.join(base_dir, exp_folder, "experiment_2", "results", "DCM", circuit_folder)
        
        if not os.path.exists(dcm_path):
            print(f"Path not found: {dcm_path}")
            continue
            
        # Iterate over components (e.g., struct_reader)
        for component in os.listdir(dcm_path):
            comp_path = os.path.join(dcm_path, component)
            if not os.path.isdir(comp_path):
                continue
                
            # Iterate over tasks/desiderata (e.g., positional)
            for task in os.listdir(comp_path):
                task_path = os.path.join(comp_path, task)
                if not os.path.isdir(task_path):
                    continue
                    
                # Iterate over lambda files (e.g., 0.01.txt)
                for file in os.listdir(task_path):
                    if file.endswith(".txt"):
                        lamb_str = file.replace(".txt", "")
                        try:
                            lamb = float(lamb_str)
                        except:
                            continue
                            
                        filepath = os.path.join(task_path, file)
                        num_heads, accuracy, heads = parse_dcm_file(filepath)
                        
                        results.append({
                            "Model": model_name,
                            "Checkpoint": int(ckpt),
                            "Component": component,
                            "Task": task,
                            "Lambda": lamb,
                            "Num_Heads": num_heads,
                            "Accuracy": accuracy,
                            # "Heads": heads # Optional, might make dataframe too large
                        })
                        
    return pd.DataFrame(results)

# Define configurations
checkpoints = [1750, 1950, 2200]
base_path = os.getcwd() # Assuming notebook is in root

# Load Goat Data
goat_base = os.path.join(base_path, "goat-experiment-2")
df_goat = load_dcm_results(goat_base, "goat", checkpoints)

# Load Qloat Data
qloat_base = os.path.join(base_path, "qloat-experiment-2")
df_qloat = load_dcm_results(qloat_base, "qloat", checkpoints)

# Combine
df = pd.concat([df_goat, df_qloat], ignore_index=True)

print(f"Loaded {len(df)} records.")
df.head()


In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import ast

# Set plot style
sns.set_theme(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)


# DCM Experiment Analysis
This notebook analyzes the results of Distributed Circuit Minimization (DCM) experiments for Goat and Qloat models across different checkpoints.
The goal is to understand how fine-tuning affects the circuit mechanisms and their minimality.
