In [None]:
import re
import pandas as pd
from collections import Counter
import ast

In [None]:
def extract_option(answer):
    """Extract the chosen option (A, B, C, D, E or a, b, c, d, e) from the LLM's answer."""
    
    answer = str(answer).strip()  # Remove leading/trailing spaces
    answer = answer.replace("**", "")

    # 3) Match a single character (a-e or A-E)
    match = re.fullmatch(r'[a-eA-E]', answer)
    if match:
        return match.group(0).upper()

    # 4) Match a letter followed by a space (with or without more text)
    match = re.search(r'^([a-eA-E])\s', answer)
    if match:
        return match.group(1).upper()
    
    # 5) Match a letter followed by a period, e.g., A., b.
    match = re.search(r'^([a-eA-E])\.', answer)
    if match:
        return match.group(1).upper()

    # 6) Match a letter inside parentheses, e.g., (a), (B),
    # OR a letter preceded by a space and followed by `)`, e.g., "text d) more text" (but NOT "textd)")
    match = re.search(r'\(([a-eA-E])\)|\s([a-eA-E])\)', answer)
    if match:
        return (match.group(1) or match.group(2)).upper()
    
    # 1) Check for uppercase letter A-E followed by another uppercase letter (A-E)
    match = re.match(r'^[A-E]([A-Z])', answer)
    if match:
        return match.group(0)[0]

    # 2) Check for lowercase letter a-e or uppercase letter A-E followed by a non-letter character
    match = re.match(r'^[a-eA-E][^a-zA-Z]', answer)
    if match:
        return match.group(0)[0]

    # Handle cases like "the correct answer is:a", "the correct answer is: a", etc.
    match = re.search(r'the correct answer is[:\s]*\(?([a-eA-E])\)?', answer.lower(), re.IGNORECASE)
    if match:
        return match.group(1).upper()
    
    return None

def evaluate_answers(file_path):
    df = pd.read_csv(file_path)

    df['correct_answer'] = df['real']
    
    result_columns = ['question', 'correct_answer']
    
    # Evaluate all columns that start with "zero_shot" or "one_shot"
    for col in [c for c in df.columns if c.startswith(("zero_shot", "one_shot"))]:
        df[f'{col}_choice'] = df[col].apply(extract_option)
        df[f'{col}_is_correct'] = df[f'{col}_choice'] == df['correct_answer']
        accuracy = df[f'{col}_is_correct'].mean()
        print(f'Accuracy for {col}: {accuracy:.2%}')
        result_columns.extend([f'{col}_choice', f'{col}_is_correct'])
    
    return df[result_columns]

models = ["mistral", "llama3.1:8b", "llama2:7b", "gemma:7b", "gemma2:9b", "phi4:14b", "qwen2.5:7b"]

for model in models:
    print(f"Results for {model}")
    evaluate_answers(f"/home/cc/PHD/ragkg/results_quiz_{model}.csv")

In [None]:
for model in models:
    print(f"Results for {model}")
    evaluate_answers(f"/home/cc/PHD/HealthBranches/results_quiz_baseline_{model}.csv")

In [None]:
# BALANCER

def balance_correct_options(df):
    num_questions = len(df)
    num_per_option = num_questions // 5  # 20% per ogni lettera (A-E)
    remaining = num_questions % 5  # Eventuali domande in eccesso
    
    target_distribution = {letter: num_per_option for letter in "ABCDE"}
    for letter in "ABCDE"[:remaining]:  # Distribuisci l'eccesso equamente
        target_distribution[letter] += 1
    
    current_distribution = Counter(df["correct_option"])
    
    def letter_to_index(letter):
        return ord(letter) - ord("A")
    
    excess = {letter: [] for letter in "ABCDE"}
    needed = {letter: target_distribution[letter] - current_distribution[letter] for letter in "ABCDE"}
    
    for letter, diff in needed.items():
        if diff < 0:
            excess[letter] = df[df["correct_option"] == letter].index.to_numpy()
    
    swap_candidates = {letter: [] for letter in "ABCDE"}
    for letter, diff in needed.items():
        if diff > 0:
            swap_candidates[letter] = df[df["correct_option"] != letter].index.to_numpy()
    
    for letter, diff in needed.items():
        if diff > 0:
            for excess_letter, indices in excess.items():
                if needed[excess_letter] < 0 and len(indices) > 0:
                    num_swaps = min(diff, abs(needed[excess_letter]), len(indices))
                    swap_indices = indices[:num_swaps]
                    excess[excess_letter] = indices[num_swaps:]
                    
                    for idx in swap_indices:
                        correct_idx = letter_to_index(df.at[idx, "correct_option"])
                        new_idx = letter_to_index(letter)
                        
                        options = df.at[idx, "options"]
                        if isinstance(options, str):
                            # options = options.strip("[]").split(", ")  # Convertire da stringa a lista
                            options = ast.literal_eval(options) 
                        
                        options[correct_idx], options[new_idx] = options[new_idx], options[correct_idx]
                        df.at[idx, "options"] = str(options)  # Salvare la lista direttamente
                        df.at[idx, "correct_option"] = letter
                    
                    needed[excess_letter] += num_swaps
                    needed[letter] -= num_swaps
    
    return df

# df = pd.read_csv("/home/cc/PHD/HealthBranches/questions_pro/ultimate_questions_v3_full.csv")
# print(df.head(10))
# print("Distribuzione prima:")
# print(Counter(df["correct_option"]))

# df = balance_correct_options(df)
# print("Distribuzione dopo:")
# print(Counter(df["correct_option"]))

# df.to_csv("/home/cc/PHD/HealthBranches/questions_pro/ultimate_questions_v3_full_balanced.csv", index=False)