In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
import re
import math

In [None]:
def plot_confidence_distribution(filename, model_names, target_ratio, temperature):
    # Read the CSV file
    df = pd.read_csv(filename)
    
    # Find all unique model names
    if target_ratio is None:
        pattern = re.compile(r'External Confidence \(AB\) \((.*?)\)')
    else:
        # Use formatted string literals to include target_ratio
        pattern = re.compile(f'External Confidence \(AB\) ({target_ratio*100:.0f}%)\((.*?)\)')
    if model_names is None:
        model_names = set()
    
        for col in df.columns:
            match = pattern.search(col)
            if match:
                model_names.add(match.group(1))

    print(model_names)
    
    # Function to process confidences
    def process_confidences(df, model_name, target_ratio):
        if target_ratio is None:
            if temperature is None:
                decision_col = f'Decision (AB) ({model_name})'
                external_confidence_col = f'External Confidence (AB) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) ({model_name})'
            else:
                decision_col = f'Decision (AB) (temp {temperature}) ({model_name})'
                external_confidence_col = f'External Confidence (AB) (temp {temperature}) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) (temp {temperature}) ({model_name})'
        else:
            if temperature is None:
                decision_col = f'Decision (AB) ({target_ratio*100:.0f}%) ({model_name})'
                external_confidence_col = f'External Confidence (AB) ({target_ratio*100:.0f}%) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) ({target_ratio*100:.0f}%) ({model_name})'
            else:
                decision_col = f'Decision (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})'
                external_confidence_col = f'External Confidence (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})'

        df[external_confidence_col] = np.where(df[decision_col] == 0, -df[external_confidence_col], df[external_confidence_col])
        df[internal_confidence_col] = np.where(df[decision_col] == 0, -df[internal_confidence_col], df[internal_confidence_col])

        df[external_confidence_col] = (df[external_confidence_col] / 2) + 0.5
        df[internal_confidence_col] = (df[internal_confidence_col] / 2) + 0.5

        return df

    # Determine the layout of subplots
    num_models = len(model_names)
    num_rows = num_models

    fig, axes = plt.subplots(num_rows, 2, figsize=(12, num_rows * 5))
    axes = axes.flatten()  # Flatten to 1D array for easy iteration

    # Plot distributions for each model
    for i, model_name in enumerate(model_names):
        # Process confidences
        df = process_confidences(df, model_name, target_ratio)

        if target_ratio is None:
            if temperature is None:
                external_confidence_col = f'External Confidence (AB) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) ({model_name})'
            else:
                external_confidence_col = f'External Confidence (AB) (temp {temperature}) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) (temp {temperature}) ({model_name})'
        else:
            if temperature is None:
                external_confidence_col = f'External Confidence (AB) ({target_ratio*100:.0f}%) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) ({target_ratio*100:.0f}%) ({model_name})'
            else:
                external_confidence_col = f'External Confidence (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})'
                internal_confidence_col = f'Internal Confidence (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})'

        if external_confidence_col in df.columns and internal_confidence_col in df.columns:
            external_confidence = df[external_confidence_col].dropna()
            internal_confidence = df[internal_confidence_col].dropna()

            # Plot the external confidence
        ax_ext = axes[i * 2]
        for true_label, color, hatch in zip([1, 0], ['orange', 'darkgreen'], [None, '//']):
            data = external_confidence[df['correct_choice_id'] == true_label]
            sns.histplot(data, kde=False, stat="density", bins=30, color=color, label=f"Correct Answer = {'A' if true_label == 1 else 'B'}", ax=ax_ext, alpha=0.6, hatch=hatch)
            mu, std = norm.fit(data)
            x = np.linspace(0, 1, 100)
            p = norm.pdf(x, mu, std)
            ax_ext.plot(x, p, color=color, linestyle='dashed')
        
        # Set y-axis limit to 0-30
        ax_ext.set_ylim(0, 30)
        
        # Increase the font size for labels, ticks, and title
        label_fontsize = 14
        title_fontsize = 12
        tick_fontsize = 12
        
        # (remaining title setting code)
        ax_ext.set_title(f'Explicit Probability Distribution for {model_name}', fontsize=title_fontsize) if target_ratio is None else ax_ext.set_title(f'Explicit Probability Distribution for {model_name} ({target_ratio*100:.0f}% imbalance)', fontsize=title_fontsize)
        ax_ext.set_xlabel('Probability', fontsize=label_fontsize)
        ax_ext.set_ylabel('Density', fontsize=label_fontsize)
        ax_ext.legend(loc='upper center', fontsize=tick_fontsize)
        
        # Plot the internal confidence
        ax_int = axes[i * 2 + 1]
        for true_label, color, hatch in zip([1, 0], ['orange', 'darkgreen'], [None, '//']):
            data = internal_confidence[df['correct_choice_id'] == true_label]
            sns.histplot(data, kde=False, stat="density", bins=30, color=color, label=f"Correct Answer = {'A' if true_label == 1 else 'B'}", ax=ax_int, alpha=0.6, hatch=hatch)
            mu, std = norm.fit(data)
            x = np.linspace(0, 1, 100)
            p = norm.pdf(x, mu, std)
            ax_int.plot(x, p, color=color, linestyle='dashed')
        
        # Set y-axis limit to 0-30
        ax_int.set_ylim(0, 30)
        
        # (remaining title setting code)
        ax_int.set_title(f'Implicit Probability Distribution for {model_name}', fontsize=title_fontsize) if target_ratio is None else ax_int.set_title(f'Implicit Probability Distribution for {model_name} ({target_ratio*100:.0f}% imbalance)', fontsize=title_fontsize)
        ax_int.set_xlabel('Probability', fontsize=label_fontsize)
        ax_int.set_ylabel('Density', fontsize=label_fontsize)
        ax_int.legend(loc='upper center', fontsize=tick_fontsize)


    # Adjust layout and save the plot as a PNG file
    plt.tight_layout()
    output_file_name = f"{filename.split('.')[0]}_Confidence_Distribution_All_Models.png" if target_ratio is None else f"{filename.split('.')[0]}_Confidence_Distribution_All_Models ({target_ratio*100:.0f}%).png"
    plt.savefig(output_file_name)
    plt.show()

In [None]:
all_model_names = ['Qwen2-72B-Instruct', 'Qwen2-7B-Instruct', 'Meta-Llama-3.1-70B-Instruct', 'Meta-Llama-3.1-8B-Instruct', 'gemma-2-27b-it', 'gemma-2-9b-it', 'Mistral-Large-Instruct-2407', 'Mistral-7B-Instruct-v0.3', 'Yi-1.5-34B-Chat', 'Yi-1.5-9B-Chat', 'Phi-3-medium-128k-instruct', 'Phi-3-mini-128k-instruct']
large_model_names = ['Qwen2-72B-Instruct', 'Meta-Llama-3.1-70B-Instruct', 'gemma-2-27b-it', 'Mistral-Large-Instruct-2407', 'Yi-1.5-34B-Chat', 'Phi-3-medium-128k-instruct']  # List of model names to analyze
first_half_model_names = ['Qwen2-72B-Instruct', 'Qwen2-7B-Instruct', 'Meta-Llama-3.1-70B-Instruct', 'Meta-Llama-3.1-8B-Instruct', 'gemma-2-27b-it', 'gemma-2-9b-it']  # List of model names to analyze
second_half_model_names = ['Mistral-Large-Instruct-2407', 'Mistral-7B-Instruct-v0.3', 'Yi-1.5-34B-Chat', 'Yi-1.5-9B-Chat', 'Phi-3-medium-128k-instruct', 'Phi-3-mini-128k-instruct']  # List of model names to analyze
small_model_names = ['Qwen2-7B-Instruct', 'Meta-Llama-3.1-8B-Instruct', 'gemma-2-9b-it', 'Mistral-7B-Instruct-v0.3', 'Yi-1.5-9B-Chat', 'Phi-3-mini-128k-instruct']
llama_model_names = ['Meta-Llama-3.1-70B-Instruct', 'Meta-Llama-3.1-8B-Instruct']
mistral_model_names = ['Mistral-Large-Instruct-2407', 'Mistral-7B-Instruct-v0.3']
csv_file = '../data/medQA_en.csv'  # Path to your CSV file
prompt_id = 2
target_ratio = None
temperature = None
plot_confidence_distribution(csv_file, large_model_names[0:3], target_ratio, temperature)