In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Patch
import plotly.colors as pc 

# Colors
COLOR_GPT4 = pc.qualitative.Plotly[1]  # Red
COLOR_GPT35 = pc.qualitative.Plotly[0]  # Blue
COLOR_REGRESSED = '#D3D3D3' 
COLOR_IMPROVED = '#E0F8F1' 

# Task names
tasks = [
    "Math I\n(Prime)", "Math II\n(Happy)", "Sensitive\n(Safety)", "Opinion\n(Survey)",
    "LangChain\n(QA)", "Code Gen\n(Exec)", "USMLE\n(Medical)", "Visual\n(Reasoning)"
]

# Performance scores
gpt4_march = [84.0, 83.6, 21.0, 97.6, 1.2, 52.0, 86.6, 24.6]
gpt4_june = [51.1, 35.2, 5.0, 22.1, 37.8, 10.0, 82.4, 27.2]
gpt35_march = [49.6, 30.6, 2.0, 94.3, 22.6, 22.0, 54.3, 10.9]
gpt35_june = [76.2, 48.2, 8.0, 86.7, 14.0, 2.0, 53.5, 14.3]

# Calculate deltas and sort by GPT-4 delta
data = []
for i in range(len(tasks)):
    delta_gpt4 = gpt4_june[i] - gpt4_march[i]
    delta_gpt35 = gpt35_june[i] - gpt35_march[i]
    data.append({
        'task': tasks[i],
        'gpt4_march': gpt4_march[i],
        'gpt4_june': gpt4_june[i],
        'gpt35_march': gpt35_march[i],
        'gpt35_june': gpt35_june[i],
        'delta_gpt4': delta_gpt4,
        'delta_gpt35': delta_gpt35
    })

data.sort(key=lambda x: x['delta_gpt4'])

# Extract sorted data
task_names = [d['task'] for d in data]
deltas_gpt4 = [d['delta_gpt4'] for d in data]
deltas_gpt35 = [d['delta_gpt35'] for d in data]
scores_gpt4_march = [d['gpt4_march'] for d in data]
scores_gpt4_june = [d['gpt4_june'] for d in data]
scores_gpt35_march = [d['gpt35_march'] for d in data]
scores_gpt35_june = [d['gpt35_june'] for d in data]

# Setup figure
fig, (ax_drift, ax_scores) = plt.subplots(2, 1, figsize=(16, 10), facecolor='white', sharex=True)
plt.subplots_adjust(left=0.08, right=0.95, top=0.92, bottom=0.15, hspace=0.1)

x_positions = np.arange(len(task_names))
bar_width = 0.35
divider_position = 5.5

# TOP CHART: Performance drift
bars_gpt4 = ax_drift.bar(x_positions - bar_width/2, deltas_gpt4, bar_width, 
                          label='GPT-4 Delta', color=COLOR_GPT4, edgecolor='white')
bars_gpt35 = ax_drift.bar(x_positions + bar_width/2, deltas_gpt35, bar_width, 
                           label='GPT-3.5 Delta', color=COLOR_GPT35, edgecolor='white', alpha=0.6)

ax_drift.bar_label(bars_gpt4, fmt='%+.1f', padding=3, fontsize=10, fontweight='bold') 
ax_drift.bar_label(bars_gpt35, fmt='%+.1f', padding=3, fontsize=10)

ax_drift.axhline(0, color='black', linewidth=1)
ax_drift.axvline(divider_position, color='black', linestyle='--', linewidth=1, alpha=0.5)
ax_drift.axvspan(-0.5, divider_position, color=COLOR_REGRESSED, alpha=0.3)
ax_drift.axvspan(divider_position, len(task_names)-0.5, color=COLOR_IMPROVED, alpha=0.3)

ax_drift.set_ylabel('Performance Change\n(June - March %)', fontweight='bold', fontsize=11)
ax_drift.set_title('ChatGPT Performance Drift Audit (Mar-Jun 2023)', fontweight='bold', fontsize=18, loc='left')
ax_drift.spines[['top', 'right', 'bottom']].set_visible(False)
ax_drift.grid(axis='y', linestyle=':', alpha=0.4)

ax_drift.text(2.5, -75, 'PERFORMANCE REGRESSED\n(Negative Delta)', ha='center', fontweight='bold', color='#555555', fontsize=12)
ax_drift.text(6.5, -75, 'IMPROVED\n(Positive Delta)', ha='center', fontweight='bold', color='#008c66', fontsize=12)
ax_drift.set_ylim(-100, 60)

# BOTTOM CHART: Absolute scores
bar_width_bottom = 0.2
bars_gpt4_march = ax_scores.bar(x_positions - 1.5*bar_width_bottom, scores_gpt4_march, bar_width_bottom, 
                                 color=COLOR_GPT4, edgecolor='white', alpha=0.4, label='GPT-4 (Mar)')
bars_gpt4_june = ax_scores.bar(x_positions - 0.5*bar_width_bottom, scores_gpt4_june, bar_width_bottom, 
                                color=COLOR_GPT4, edgecolor='white', label='GPT-4 (Jun)')
bars_gpt35_march = ax_scores.bar(x_positions + 0.5*bar_width_bottom, scores_gpt35_march, bar_width_bottom, 
                                  color=COLOR_GPT35, edgecolor='white', alpha=0.4, label='GPT-3.5 (Mar)')
bars_gpt35_june = ax_scores.bar(x_positions + 1.5*bar_width_bottom, scores_gpt35_june, bar_width_bottom, 
                                 color=COLOR_GPT35, edgecolor='white', label='GPT-3.5 (Jun)')

for bars in [bars_gpt4_march, bars_gpt4_june, bars_gpt35_march, bars_gpt35_june]:
    ax_scores.bar_label(bars, fmt='%.0f', label_type='edge', padding=3, rotation=90, fontsize=9)

ax_scores.axvline(divider_position, color='black', linestyle='--', linewidth=1, alpha=0.5)
ax_scores.axvspan(-0.5, divider_position, color=COLOR_REGRESSED, alpha=0.3)
ax_scores.axvspan(divider_position, len(task_names)-0.5, color=COLOR_IMPROVED, alpha=0.3)

ax_scores.set_ylabel('Absolute Score (%)', fontweight='bold', fontsize=11)
ax_scores.set_xticks(x_positions)
ax_scores.set_xticklabels(task_names, fontweight='bold', fontsize=11)
ax_scores.set_ylim(0, 135)
ax_scores.spines[['top', 'right', 'left']].set_visible(False)
ax_scores.grid(axis='y', linestyle='-', alpha=0.2)
ax_scores.yaxis.set_ticks([])

legend_elements = [
    Patch(facecolor=COLOR_GPT4, alpha=0.4, label='GPT-4 (Mar)'),
    Patch(facecolor=COLOR_GPT4, alpha=1.0, label='GPT-4 (Jun)'),
    Patch(facecolor=COLOR_GPT35, alpha=0.4, label='GPT-3.5 (Mar)'),
    Patch(facecolor=COLOR_GPT35, alpha=1.0, label='GPT-3.5 (Jun)')
]
ax_scores.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, -0.12), 
                 ncol=4, frameon=False, fontsize=11)

plt.tight_layout()
plt.savefig('gpt4_over_time.png', dpi=300)
plt.show()

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from itertools import permutations
from math import pi, sin

# Task names and performance data
tasks = ["Math I", "Math II", "Sensitive", "Opinion", "LangChain", "Code Gen", "USMLE", "Visual"]
scores_march = np.array([84.0, 83.6, 21.0, 97.6, 1.2, 52.0, 86.6, 24.6])
scores_june = np.array([51.1, 35.2, 5.0, 22.1, 37.8, 10.0, 82.4, 27.2])

# Colors
COLOR_MARCH = '#EF553B'
COLOR_JUNE = '#2c3e50'

def calculate_polygon_area(radii):
    """Calculates polygon area as % of a perfect circle (radius 100)"""
    n = len(radii)
    polygon_area = 0.5 * sin(2*pi/n) * sum(radii[i] * radii[(i+1)%n] for i in range(n))
    perfect_circle_area = pi * 100**2
    return (polygon_area / perfect_circle_area) * 100

# Find ordering that maximizes/minimizes area
all_permutations = [[0] + list(p) for p in permutations(range(1, 8))]
best_order = max(all_permutations, key=lambda p: calculate_polygon_area([scores_march[i] for i in p]))
worst_order = min(all_permutations, key=lambda p: calculate_polygon_area([scores_march[i] for i in p]))

# Manual swap for best order (Opinion <-> Math II)
idx_opinion = best_order.index(3)
idx_math2 = best_order.index(1)
best_order[idx_opinion], best_order[idx_math2] = best_order[idx_math2], best_order[idx_opinion]

# Create fixed labels based on best order
axis_labels = [""] * 8
for position, task_idx in enumerate(best_order):
    axis_labels[task_idx] = f"<b>{position+1}. {tasks[task_idx]}</b>"

# Create subplots
fig = make_subplots(
    rows=1, cols=2, 
    specs=[[{'type': 'polar'}]*2],
    subplot_titles=("Maximized area (same data)", "Minimized area (same data)"),
    horizontal_spacing=0.15
)

# Plot both orderings
for col_idx, order in enumerate([best_order, worst_order], 1):
    # March data
    fig.add_trace(go.Scatterpolar(
        r=[scores_march[i] for i in order] + [scores_march[order[0]]],
        theta=[axis_labels[i] for i in order] + [axis_labels[order[0]]],
        fill='toself', 
        opacity=0.6, 
        line_color=COLOR_MARCH, 
        name="March",
        showlegend=(col_idx == 1)
    ), row=1, col=col_idx)
    
    # June data
    fig.add_trace(go.Scatterpolar(
        r=[scores_june[i] for i in order] + [scores_june[order[0]]],
        theta=[axis_labels[i] for i in order] + [axis_labels[order[0]]],
        fill='toself', 
        opacity=0.6, 
        line_color=COLOR_JUNE, 
        name="June",
        showlegend=(col_idx == 1)
    ), row=1, col=col_idx)
    
    # Calculate and annotate areas
    area_march = calculate_polygon_area([scores_march[i] for i in order])
    area_june = calculate_polygon_area([scores_june[i] for i in order])
    
    annotation_x = 0.22 if col_idx == 1 else 0.78
    fig.add_annotation(
        x=annotation_x, y=-0.15, 
        xref="paper", yref="paper", 
        showarrow=False,
        text=f"<span style='color:{COLOR_MARCH}'><b>March Area: {area_march:.1f}%</b></span><br>"
             f"<span style='color:{COLOR_JUNE}'>June Area: {area_june:.1f}%</span>",
        font=dict(size=14)
    )

# Layout
fig.update_layout(
    title_text="Key message: Visual area depends on axis order, not just data",
    title_x=0.5,
    height=600, 
    width=1000,
    margin=dict(t=100, b=100),
    legend=dict(y=1.1, x=0.5, xanchor='center', orientation='h'),
    font=dict(family="Arial", size=12)
)

fig.update_polars(radialaxis=dict(visible=True, showticklabels=False, range=[0, 100]))

fig.show()