# Radar plots for each Approach/Cluster

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

cluster_methods = [
    'kmeans 3 emb', 'kmeans 3 cat',
    'agg 3 emb', 'agg 3 cat',
    'gmm 3 emb', 'gmm 3 cat',
    'birch 3 emb', 'birch 3 cat'
]

cluster_names = ["Novice", "Developing", "Proficient",
                 "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"]

def create_radar_charts(method_data, method, stats_label):
    """Create radar charts from DataFrame"""
    metrics = ['pathophysiology_norm_mean', 'epidemiology_norm_mean', 'etiology_norm_mean', 'history_norm_mean',
               'physical_norm_mean', 'exams_norm_mean', 'differential_norm_mean', 'therapeutic_norm_mean']
    
    clusters = method_data['cluster'].unique()
    
    # Calculate subplot layout
    n_clusters = len(clusters)
    n_rows = (n_clusters + 2) // 3  # Max 3 plots per row
    n_cols = min(n_clusters, 3)
    
    # Create figure with subplots
    fig, axs = plt.subplots(
        n_rows, n_cols, 
        figsize=(5*n_cols, 5*n_rows), 
        subplot_kw={'projection': 'polar'}
    )
    
    # Flatten axs for easier indexing if multiple rows
    if n_clusters > 1:
        axs = axs.flatten() if n_rows > 1 else axs
    
    # Color map for different clusters
    colors = plt.cm.rainbow(np.linspace(0, 1, len(clusters)))
    
    # Plot data for each cluster
    for i, (cluster, color) in enumerate(zip(clusters, colors)):
        # Handle subplot indexing
        ax = axs[i] if n_clusters > 1 else axs
        
        cluster_data = method_data[method_data['cluster'] == cluster]
        values = cluster_data[metrics].values.flatten()
        
        # Compute angles for metrics
        theta = np.linspace(0, 2*np.pi, len(metrics), endpoint=False)
        
        # Close the plot by repeating the first value
        values = np.concatenate((values, [values[0]]))
        theta = np.concatenate((theta, [theta[0]]))
        
        # Plot the radar chart
        ax.plot(theta, values, color=color)
        ax.fill(theta, values, color=color, alpha=0.25)
        
        # Set labels
        ax.set_xticks(theta[:-1])
        ax.set_xticklabels([
            metric.replace('_norm_mean', '').replace('_', ' ').title() 
            for metric in metrics
        ])
        
        # Set title with cluster and year
        year_mean = cluster_data['year_mean'].values[0]
        ax.set_title(f'Cluster {cluster_names[cluster]}\nYear {year_mean:.1f}')
    
    # Remove extra subplots if any
    if n_clusters < len(axs.flatten()):
        for j in range(n_clusters, len(axs.flatten())):
            fig.delaxes(axs.flatten()[j])
    
    # Overall figure title
    fig.suptitle(f'{method} - {stats_label}', fontsize=16)
    
    # Adjust layout and display
    plt.tight_layout()
    plt.show()

stats_all = {'Human 84': pd.read_csv('medical_specialist/annotations-dpoc-medical_specialist_stats_84.csv'),
             'Human 435': pd.read_csv('medical_specialist/annotations-dpoc-medical_specialist_stats_435.csv'),
             'BioBERT': pd.read_csv('biobert_balanced/annotations-dpoc-biobert_stats.csv'),
             'BioBERT-Llama': pd.read_csv('biobert-llama_balanced/annotations-dpoc-biobert-llama_stats.csv'),
             'Llama': pd.read_csv('llama/annotations-dpoc-llm_10_tf_idf_custom_shot_stats.csv')}
for filename in os.listdir('llama'):
    if filename.startswith('annotations-dpoc-llm') and filename.endswith('_stats.csv'):
        approach = filename.split('annotations-dpoc-llm_')[1].split('_stats.csv')[0]
        stats_all[approach] = pd.read_csv(f'llama/{filename}')

for method in cluster_methods:
    for stats_label in stats_all:
        stats = stats_all[stats_label]
        method_data = stats[stats['method'] == method]
        create_radar_charts(method_data, method, stats_label)