In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import roc_curve, precision_recall_curve, auc
import matplotlib.pyplot as plt
import math

In [None]:
def calculate_accuracy(csv_file, model_names, prompt_id, target_ratio, temperature):
    # Load the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    acc_list = []
    # Loop through each model name
    for i, model_name in enumerate(model_names):
        # Extract the true labels, predicted labels, and predicted probabilities
        if prompt_id == 1:
            if target_ratio is None:
                if temperature is None:
                    true_labels = df['propose_correct_answer']
                    decisions = df[f'Decision ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        'propose_correct_answer': true_labels,
                        f'Decision ({model_name})': decisions,
                    }
                else:
                    true_labels = df['propose_correct_answer']
                    decisions = df[f'Decision (temp {temperature}) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        'propose_correct_answer': true_labels,
                        f'Decision (temp {temperature}) ({model_name})': decisions,
                    }
            else:
                if temperature is None:
                    true_labels = df[f'propose_correct_answer ({target_ratio*100:.0f}%)']
                    decisions = df[f'Decision ({target_ratio*100:.0f}%) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        f'propose_correct_answer ({target_ratio*100:.0f}%)': true_labels,
                        f'Decision ({target_ratio*100:.0f}%) ({model_name})': decisions,
                    }
                else:
                    true_labels = df[f'propose_correct_answer ({target_ratio*100:.0f}%)']
                    decisions = df[f'Decision (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        f'propose_correct_answer ({target_ratio*100:.0f}%)': true_labels,
                        f'Decision (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})': decisions,
                    }
        elif prompt_id == 2:
            if target_ratio is None:
                if temperature is None:
                    true_labels = df['correct_choice_id']
                    decisions = df[f'Decision (AB) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        'correct_choice_id': true_labels,
                        f'Decision (AB) ({model_name})': decisions,
                    }
                else:
                    true_labels = df['correct_choice_id']
                    decisions = df[f'Decision (AB) (temp {temperature}) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        'correct_choice_id': true_labels,
                        f'Decision (AB) (temp {temperature}) ({model_name})': decisions,
                    }
            else:
                if temperature is None:
                    true_labels = df[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions = df[f'Decision (AB) ({target_ratio*100:.0f}%) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        f'correct_choice_id ({target_ratio*100:.0f}%)': true_labels,
                        f'Decision (AB) ({target_ratio*100:.0f}%) ({model_name})': decisions,
                    }
                else:
                    true_labels = df[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions = df[f'Decision (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        f'correct_choice_id ({target_ratio*100:.0f}%)': true_labels,
                        f'Decision (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})': decisions,
                    }
            
        elif prompt_id == 3:
            if target_ratio is None:
                if temperature is None:
                    true_labels = df['correct_choice_id']
                    decisions = df[f'Decision (AB2) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        'correct_choice_id': true_labels,
                        f'Decision (AB2) ({model_name})': decisions,
                    }
                else:
                    true_labels = df['correct_choice_id']
                    decisions = df[f'Decision (AB2) (temp {temperature}) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        'correct_choice_id': true_labels,
                        f'Decision (AB2) (temp {temperature}) ({model_name})': decisions,
                    }
            else:
                if temperature is None:
                    true_labels = df[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions = df[f'Decision (AB2) ({target_ratio*100:.0f}%) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        f'correct_choice_id ({target_ratio*100:.0f}%)': true_labels,
                        f'Decision (AB2) ({target_ratio*100:.0f}%) ({model_name})': decisions,
                    }
                else:
                    true_labels = df[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions = df[f'Decision (AB2) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})']
                    
                    # Check for NaNs and print information
                    columns_to_check = {
                        f'correct_choice_id ({target_ratio*100:.0f}%)': true_labels,
                        f'Decision (AB2) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})': decisions,
                    }

        else:
            raise ValueError("The prompt_id must be either 1 or 2 or 3")
            
        nan_indices = set()
        for col_name, col_data in columns_to_check.items():
            nans = col_data.isna()
            total_nans = nans.sum()
            if total_nans > 0:
                col_nan_indices = df[nans].index.tolist()
                nan_indices.update(col_nan_indices)
                print(f"Column '{col_name}' has {total_nans} NaN(s).")
                print(f"Row indices with NaNs: {col_nan_indices}")
        
        # Remove rows with NaNs
        nan_indices = list(nan_indices)
        df_cleaned = df.drop(index=nan_indices)
        
        # Re-extract the true labels, predicted labels, and predicted probabilities after removing NaNs
        if prompt_id == 1:
            if target_ratio is None:
                if temperature is None:
                    true_labels_cleaned = df_cleaned['propose_correct_answer']
                    decisions_cleaned = df_cleaned[f'Decision ({model_name})']
                else:
                    true_labels_cleaned = df_cleaned['propose_correct_answer']
                    decisions_cleaned = df_cleaned[f'Decision (temp {temperature}) ({model_name})']
            else:
                if temperature is None:
                    true_labels_cleaned = df_cleaned[f'propose_correct_answer ({target_ratio*100:.0f}%)']
                    decisions_cleaned = df_cleaned[f'Decision ({target_ratio*100:.0f}%) ({model_name})']
                else:
                    true_labels_cleaned = df_cleaned[f'propose_correct_answer ({target_ratio*100:.0f}%)']
                    decisions_cleaned = df_cleaned[f'Decision (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})']
        elif prompt_id == 2:
            if target_ratio is None:
                if temperature is None:
                    true_labels_cleaned = df_cleaned['correct_choice_id']
                    decisions_cleaned = df_cleaned[f'Decision (AB) ({model_name})']
                else:
                    true_labels_cleaned = df_cleaned['correct_choice_id']
                    decisions_cleaned = df_cleaned[f'Decision (AB) (temp {temperature}) ({model_name})']
            else:
                if temperature is None:
                    true_labels_cleaned = df_cleaned[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions_cleaned = df_cleaned[f'Decision (AB) ({target_ratio*100:.0f}%) ({model_name})']
                else:
                    true_labels_cleaned = df_cleaned[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions_cleaned = df_cleaned[f'Decision (AB) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})']
        elif prompt_id == 3:
            if target_ratio is None:
                if temperature is None:
                    true_labels_cleaned = df_cleaned['correct_choice_id']
                    decisions_cleaned = df_cleaned[f'Decision (AB2) ({model_name})']
                else:
                    true_labels_cleaned = df_cleaned['correct_choice_id']
                    decisions_cleaned = df_cleaned[f'Decision (AB2) (temp {temperature}) ({model_name})']
            else:
                if temperature is None:
                    true_labels_cleaned = df_cleaned[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions_cleaned = df_cleaned[f'Decision (AB2) ({target_ratio*100:.0f}%) ({model_name})']
                else:
                    true_labels_cleaned = df_cleaned[f'correct_choice_id ({target_ratio*100:.0f}%)']
                    decisions_cleaned = df_cleaned[f'Decision (AB2) (temp {temperature}) ({target_ratio*100:.0f}%) ({model_name})']
        else:
            raise ValueError("The prompt_id must be either 1 or 2 or 3")

        acc = (true_labels_cleaned == decisions_cleaned).sum() / len(df_cleaned)
        acc_list.append(acc)
        
    return acc_list

In [None]:
all_model_names = ['Qwen2-72B-Instruct', 'Qwen2-7B-Instruct', 'Meta-Llama-3.1-70B-Instruct', 'Meta-Llama-3.1-8B-Instruct', 'gemma-2-27b-it', 'gemma-2-9b-it', 'Mistral-Large-Instruct-2407', 'Mistral-7B-Instruct-v0.3', 'Yi-1.5-34B-Chat', 'Yi-1.5-9B-Chat', 'Phi-3-medium-128k-instruct', 'Phi-3-mini-128k-instruct']
large_model_names = ['Qwen2-72B-Instruct', 'Meta-Llama-3.1-70B-Instruct', 'gemma-2-27b-it', 'Mistral-Large-Instruct-2407', 'Yi-1.5-34B-Chat', 'Phi-3-medium-128k-instruct']  # List of model names to analyze
first_half_model_names = ['Qwen2-72B-Instruct', 'Qwen2-7B-Instruct', 'Meta-Llama-3.1-70B-Instruct', 'Meta-Llama-3.1-8B-Instruct', 'gemma-2-27b-it', 'gemma-2-9b-it']  # List of model names to analyze
second_half_model_names = ['Mistral-Large-Instruct-2407', 'Mistral-7B-Instruct-v0.3', 'Yi-1.5-34B-Chat', 'Yi-1.5-9B-Chat', 'Phi-3-medium-128k-instruct', 'Phi-3-mini-128k-instruct']  # List of model names to analyze
small_model_names = ['Qwen2-7B-Instruct', 'Meta-Llama-3.1-8B-Instruct', 'gemma-2-9b-it', 'Mistral-7B-Instruct-v0.3', 'Yi-1.5-9B-Chat', 'Phi-3-mini-128k-instruct']
llama_model_names = ['Meta-Llama-3.1-70B-Instruct', 'Meta-Llama-3.1-8B-Instruct']
mistral_model_names = ['Mistral-Large-Instruct-2407', 'Mistral-7B-Instruct-v0.3']
fine_tuned_models = ['Meta-Llama-3.1-8B-Instruct', 'Meta-Llama-3.1-8B-Instruct (LoRA 10)']
csv_file = 'college_medicine.csv'  # Path to your CSV file
prompt_id = 2
target_ratio = None
temperature = 0
acc_list = calculate_accuracy(csv_file, fine_tuned_models, prompt_id, target_ratio, temperature)

In [None]:
np.round(np.array(acc_list), 3)