# Results Visualization

This notebook visualizes OOD detection results after running `evaluate_all.py`.


In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

with open('../results.json') as f:
    results = json.load(f)

df = pd.DataFrame(results).T
df.index.name = 'Method'
df = df.reset_index()
print(df.to_string(index=False))

In [None]:
# Bar chart comparison
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
metrics = ['AUROC', 'FPR@95TPR', 'AUPR']
colors = sns.color_palette('Set2', len(df))

for ax, metric in zip(axes, metrics):
    bars = ax.bar(df['Method'], df[metric], color=colors)
    ax.set_title(metric)
    ax.set_ylabel(metric)
    ax.set_xticklabels(df['Method'], rotation=30, ha='right')
    for bar, val in zip(bars, df[metric]):
        ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.002,
                f'{val:.3f}', ha='center', va='bottom', fontsize=8)

plt.suptitle('OOD Detection Methods on CLINC150', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.savefig('../report/results_comparison.pdf', bbox_inches='tight')
plt.show()

In [None]:
# Comparison with published SotA
sota = pd.DataFrame([
    {'Method': 'MSP (Hendrycks 2017)', 'AUROC': 82.36, 'FPR@95TPR': 57.82, 'source': 'published'},
    {'Method': 'Energy (Liu 2020)',    'AUROC': 88.44, 'FPR@95TPR': 46.20, 'source': 'published'},
    {'Method': 'Maha (Podolskiy 2021)','AUROC': 96.76, 'FPR@95TPR': 18.32, 'source': 'published'},
    {'Method': 'KNN (Sun 2022)',       'AUROC': 95.30, 'FPR@95TPR': 22.10, 'source': 'published'},
])

ours = df.copy()
ours['AUROC'] = ours['AUROC'] * 100
ours['FPR@95TPR'] = ours['FPR@95TPR'] * 100
ours['source'] = 'ours'

combined = pd.concat([sota, ours[['Method','AUROC','FPR@95TPR','source']]], ignore_index=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
palette = {'published': '#2196F3', 'ours': '#FF5722'}

for ax, metric, better in zip([ax1, ax2], ['AUROC', 'FPR@95TPR'], ['higher', 'lower']):
    for _, row in combined.iterrows():
        bar = ax.bar(row['Method'], row[metric],
                     color=palette[row['source']], alpha=0.8)
    ax.set_title(f'{metric} (% â€” {better} is better)')
    ax.set_xticklabels(combined['Method'], rotation=40, ha='right', fontsize=8)

from matplotlib.patches import Patch
ax1.legend(handles=[Patch(color='#2196F3', label='Published'), Patch(color='#FF5722', label='Ours')])

plt.suptitle('Comparison with Published State-of-the-Art', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('../report/sota_comparison.pdf', bbox_inches='tight')
plt.show()