In [1]:
import json
from pathlib import Path
import numpy as np
import plotly.graph_objects as go

In [2]:
COLORS = {
    "spatial": "#FF7F0E",     # Orange
    "descriptive": "#1F77B4", # Blue
    "neutral": "#2CA02C",    # Green
}
FILL_COLORS = {
    "spatial": 'rgba(255, 127, 14, 0.2)',
    "descriptive": 'rgba(31, 119, 180, 0.2)',
    "neutral": "rgba(44,160,44, 0.2)",
}

def load_results(artifact_path, normalized=True):
    coeffs = json.load(open(artifact_path / "coeff_test/coeffs.json", "r"))["coeff"]
    outputs = json.load(open(artifact_path / "coeff_test/outputs.json", "r"))
    
    #pos_probs (spatial) and neg_probs (descriptive)
    if normalized:
        for x in outputs:
            pos_probs = np.array(x["pos_probs"])
            neg_probs = np.array(x["neg_probs"])
            total = pos_probs + neg_probs
            x["pos_probs"] = (pos_probs / total).tolist()
            x["neg_probs"] = (neg_probs / total).tolist()
    
    return coeffs, outputs


def get_avg_std(x):
    avg = np.mean(x, axis=0)
    std = np.std(x, axis=0)
    return avg, std


def plot_steering(coeffs, outputs, title_text=None, width=425, height=300, legend_title="Language Type", error_band=False, x_range=None):
    fig = go.Figure()

    #spatial (pos) and descriptive (neg)
    for group, label in [("pos", "spatial"), ("neg", "descriptive")]:
        avg, std = get_avg_std([x[f'{group}_probs'] for x in outputs])
        fig.add_trace(go.Scatter(
            x=coeffs, y=avg, mode='lines+markers', name=label, 
            marker_color=COLORS[label], showlegend=True
        ))
        if error_band:
            fig.add_trace(go.Scatter(
                x=coeffs, y=avg+std, mode='lines', marker=dict(color="#444"), 
                line=dict(width=0), showlegend=False
            ))
            fig.add_trace(go.Scatter(
                x=coeffs, y=avg-std, mode='lines', marker=dict(color="#444"), 
                line=dict(width=0), 
                fillcolor=FILL_COLORS[label], fill='tonexty', showlegend=False
            ))
        
    fig.update_layout(
        width=width, height=height, plot_bgcolor='white',
        margin=dict(l=10, r=10, t=20, b=25),
        font=dict(size=14), title_text=title_text, 
        title_font=dict(size=16), title_x=0.48, title_y=0.98,
        legend_title_text=legend_title, legend_title_font=dict(size=15),
    )
    fig.update_xaxes(
        mirror=True, showgrid=True, gridcolor='darkgrey',
        zeroline = True, zerolinecolor='black',
        title_text="Steering Coefficient (λ)",
        title_font=dict(size=15), tickfont=dict(size=13),
        showline=True, linewidth=1, linecolor='darkgrey',
        title_standoff=1, nticks=10, range=x_range, 
    )
    fig.update_yaxes(
        mirror=True, showgrid=True, gridcolor='darkgrey',
        zeroline = True, zerolinecolor='darkgrey',
        title_text="Probability (%)",
        title_font=dict(size=15), tickfont=dict(size=13),
        showline=True, linewidth=1, linecolor='darkgrey',
        title_standoff=2, range=[0, 1],
    )
    return fig

In [3]:
# Load data - try multiple path options
import os
if os.path.exists("../runs_vision/gpt2/coeff_test/coeffs.json"):
    artifact_path = Path("../runs_vision/gpt2")
elif os.path.exists("runs_vision/gpt2/coeff_test/coeffs.json"):
    artifact_path = Path("runs_vision/gpt2")
else:
    # Use absolute path as fallback
    artifact_path = Path("/Users/bretthaas/vision/vision-bias-steering/runs_vision/gpt2")

print(f"Using path: {artifact_path.absolute()}")
print(f"File exists: {(artifact_path / 'coeff_test/coeffs.json').exists()}")

# Load results
coeffs, outputs = load_results(artifact_path, normalized=True)
print(f"\n✓ Loaded {len(coeffs)} coefficients and {len(outputs)} samples")
print(f"  Coefficient range: [{min(coeffs):.1f}, {max(coeffs):.1f}]")

# Verify we have variation (this confirms steering is working)
all_pos = np.array([np.array(x['pos_probs']) for x in outputs])
all_neg = np.array([np.array(x['neg_probs']) for x in outputs])
pos_mean = all_pos.mean(axis=0)
neg_mean = all_neg.mean(axis=0)
pos_norm = pos_mean / (pos_mean + neg_mean)
variation = pos_norm.max() - pos_norm.min()
print(f"  Spatial probability variation: {variation:.4f} ({variation*100:.1f}%)")
if variation < 0.01:
    print("  ⚠️  Warning: Very little variation - steering may not be working properly")
else:
    print("  ✓ Good variation detected - steering is working!")

# Create plot
fig = plot_steering(
    coeffs, outputs, 
    width=470, height=300, 
    error_band=True, 
    title_text="Vision Steering (Spatial vs Descriptive)", 
    x_range=[min(coeffs)-5, max(coeffs)+5]
)

# Display in notebook
fig.show()

# Export the plot
import os
os.makedirs("../plots", exist_ok=True)  # Create plots directory if it doesn't exist
output_dir = Path("../plots")

# Save as HTML (interactive)
html_path = output_dir / "gpt2-vision-steering.html"
fig.write_html(str(html_path))
print(f"\n✓ Saved interactive plot to: {html_path.absolute()}")

Using path: /Users/bretthaas/vision/vision-bias-steering/plotting/../runs_vision/gpt2
File exists: True

✓ Loaded 61 coefficients and 100 samples
  Coefficient range: [-30.0, 30.0]
  Spatial probability variation: 0.4770 (47.7%)
  ✓ Good variation detected - steering is working!



✓ Saved interactive plot to: /Users/bretthaas/vision/vision-bias-steering/plotting/../plots/gpt2-vision-steering.html
