In [None]:
import numpy as np
from math import pi
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

# Load the dataset
# Update this with the correct file path
file_path = "../gpt-eval/security_evaluation_results.json"
with open(file_path, 'r') as f:
    raw_data = json.load(f)

# Convert JSON data to DataFrame
data = pd.json_normalize(raw_data)
data = data.rename(columns={"model": "Model", "category": "Category",
                   "sbs.score": "SBS", "dme.score": "DME", "rcr.score": "RCR"})

# Ensure numeric columns are properly converted
data["SBS"] = pd.to_numeric(data["SBS"], errors='coerce')
data["DME"] = pd.to_numeric(data["DME"], errors='coerce')
data["RCR"] = pd.to_numeric(data["RCR"], errors='coerce')

# Preview the data
data.head()

# Aggregate data by model and category
summary = data.groupby(["Model", "Category"]).mean(
    numeric_only=True).reset_index()

# Visualize Security Breach Score (SBS) by model and category
plt.figure(figsize=(12, 6))
sns.barplot(data=summary, x="Category", y="SBS", hue="Model")
plt.title("Security Breach Score (SBS) by Model and Category")
plt.xticks(rotation=45, ha="right")
plt.ylabel("Mean SBS")
plt.xlabel("Category")
plt.legend(title="Model")
plt.tight_layout()
plt.show()

# Visualize Defense Mechanism Effectiveness (DME) by model and category
plt.figure(figsize=(12, 6))
pivot_data = summary.pivot(index="Category", columns="Model", values="DME")
sns.heatmap(pivot_data, annot=True, cmap="YlGnBu", fmt=".2f")
plt.title("Defense Mechanism Effectiveness (DME) by Model and Category")
plt.ylabel("Category")
plt.xlabel("Model")
plt.tight_layout()
plt.show()

# Radar chart for Response Consistency Rate (RCR)


def radar_chart(data, category):
    stats = data[data["Category"] == category]
    models = stats["Model"].unique()

    # Prepare data for radar chart
    values = stats.set_index("Model")["RCR"].reindex(models).fillna(0).values

    # Add starting point to close the chart
    values = np.append(values, values[0])
    labels = list(models) + [models[0]]

    # Calculate angles
    angles = [n / float(len(labels)) * 2 * pi for n in range(len(labels))]

    # Initialize radar chart
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))

    # Plot the data
    ax.plot(angles, values, linewidth=2, linestyle='solid', label=category)
    ax.fill(angles, values, alpha=0.25)

    # Add labels and title
    ax.set_xticks(angles)
    ax.set_xticklabels(labels)
    ax.set_title(f"Response Consistency Rate (RCR) - {category}")

    plt.legend(loc="upper right")
    plt.show()


# Example radar chart for a single category
radar_chart(summary, "Chain-of-Thought Manipulation")

# Generate radar charts for all categories
for category in summary["Category"].unique():
    radar_chart(summary, category)