In [11]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import torch.nn.functional as F
from pathlib import Path

In [12]:
# Load original dataset (Phase I) 
DATA_CSV = Path("../Phase I/data/medquestions_subset_150.csv")

base = pd.read_csv(DATA_CSV)

QUESTION_COL = "question"
TRUTH_COL    = "answer"

print("Ground truth loaded:", len(base))
print("Columns:", list(base.columns))
base.head(3)

Ground truth loaded: 150
Columns: ['qid', 'question', 'answer']


Unnamed: 0,qid,question,answer
0,medquestions-150-0000,What are the genetic changes related to famili...,Mutations in the APC gene cause both classic a...
1,medquestions-150-0001,What are the treatments for Noonan syndrome ?,These resources address the diagnosis or manag...
2,medquestions-150-0002,How to diagnose National Hormone and Pituitary...,CJD is usually diagnosed based on signs and sy...


In [13]:
# Load embedding model

print("Loading MiniLM embedding model...")
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Model loaded.")

Loading MiniLM embedding model...
Model loaded.


In [15]:
# Phase II Tone Result Files (8 total)

BASE = Path(".")

FILES = {
    # GPT tones
    "gpt_professional": BASE / "gpt/professional/results_gpt_professional_medquestions150.csv",
    "gpt_specialist":   BASE / "gpt/specialist/results_gpt_specialist_medquestions150.csv",
    "gpt_friendly":     BASE / "gpt/friendly/results_gpt_friendly_medquestions150.csv",
    "gpt_rude":         BASE / "gpt/rude/results_gpt_rude_medquestions150.csv",

    # Gemini tones
    "gemini_professional": BASE / "gemini/professional/results_gemini_professional_medquestions150.csv",
    "gemini_specialist":   BASE / "gemini/specialist/results_gemini_specialist_medquestions150.csv",
    "gemini_friendly":     BASE / "gemini/friendly/results_gemini_friendly_medquestions150.csv",
    "gemini_rude":         BASE / "gemini/rude/results_gemini_rude_medquestions150.csv",
}

for label, path in FILES.items():
    print(f"{label:20} - {path} | Exists: {path.exists()}")

gpt_professional     - gpt\professional\results_gpt_professional_medquestions150.csv | Exists: True
gpt_specialist       - gpt\specialist\results_gpt_specialist_medquestions150.csv | Exists: True
gpt_friendly         - gpt\friendly\results_gpt_friendly_medquestions150.csv | Exists: True
gpt_rude             - gpt\rude\results_gpt_rude_medquestions150.csv | Exists: True
gemini_professional  - gemini\professional\results_gemini_professional_medquestions150.csv | Exists: True
gemini_specialist    - gemini\specialist\results_gemini_specialist_medquestions150.csv | Exists: True
gemini_friendly      - gemini\friendly\results_gemini_friendly_medquestions150.csv | Exists: True
gemini_rude          - gemini\rude\results_gemini_rude_medquestions150.csv | Exists: True


In [16]:
# Phase I helper function (unchanged)

def add_semantic_scores(df_merged, pred_col, truth_col=TRUTH_COL, prefix=""):
    if df_merged.empty:
        print(f"No rows to score for prefix '{prefix}'.")
        df_merged[f"{prefix}semantic_sim"] = []
        df_merged[f"{prefix}semantic_correct"] = []
        return df_merged

    truth_texts = df_merged[truth_col].fillna("").astype(str).tolist()
    pred_texts  = df_merged[pred_col].fillna("").astype(str).tolist()

    print(f"Encoding embeddings for {prefix}...")
    truth_emb = model.encode(truth_texts, convert_to_tensor=True, show_progress_bar=True)
    pred_emb  = model.encode(pred_texts,  convert_to_tensor=True, show_progress_bar=True)

    sims = F.cosine_similarity(truth_emb, pred_emb)
    sims_np = sims.cpu().numpy()

    sim_col     = f"{prefix}semantic_sim"
    correct_col = f"{prefix}semantic_correct"

    df_merged[sim_col] = sims_np

    THRESHOLD = 0.75  # same threshold as Phase 1
    df_merged[correct_col] = (df_merged[sim_col] >= THRESHOLD).astype(int)

    return df_merged

In [17]:
# Phase I evaluation function (same structure)

def evaluate_model(results_csv: Path, model_label: str):
    print(f"\nEvaluating {model_label.upper()} from {results_csv}")

    df_raw = pd.read_csv(results_csv)
    print("Model result columns:", list(df_raw.columns))
    print(df_raw.head(3), "\n")

    merged = base.merge(
        df_raw[[QUESTION_COL, "model_answer"]],
        left_on=QUESTION_COL,
        right_on=QUESTION_COL,
        how="inner",
    )

    answer_col = f"{model_label}_answer"
    merged.rename(columns={"model_answer": answer_col}, inplace=True)

    # Add semantic scoring (Phase 1 helper)
    merged = add_semantic_scores(
        merged,
        pred_col=answer_col,
        truth_col=TRUTH_COL,
        prefix=f"{model_label}_",
    )

    correct_col = f"{model_label}_semantic_correct"
    sem_acc = merged[correct_col].mean()

    print(f"{model_label.upper()} semantic accuracy: {sem_acc:.3f}\n")

    # Preview
    print(merged[
        [QUESTION_COL, TRUTH_COL, answer_col,
         f"{model_label}_semantic_sim", correct_col]
    ].head(), "\n")

    return merged, sem_acc

In [18]:
# Run evaluations for all 8 tone files

all_results = {}
scores = []

for label, path in FILES.items():
    if not path.exists():
        print(f"Skipping {label:20} - file missing.")
        continue

    merged_df, sem_acc = evaluate_model(path, label)
    all_results[label] = merged_df

    scores.append({
        "model_label": label,
        "semantic_accuracy": sem_acc
    })

scores_df = pd.DataFrame(scores).sort_values(
    "semantic_accuracy",
    ascending=False
)
scores_df


Evaluating GPT_PROFESSIONAL from gpt\professional\results_gpt_professional_medquestions150.csv
Model result columns: ['qid', 'model', 'question', 'model_answer', 'latency_ms', 'status', 'error']
                     qid                     model  \
0  medquestions-500-0000  gpt-4o-mini_professional   
1  medquestions-500-0001  gpt-4o-mini_professional   
2  medquestions-500-0002  gpt-4o-mini_professional   

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   

                                        model_answer  latency_ms status  error  
0  Familial adenomatous polyposis (FAP) is a here...       11552     ok    NaN  
1  Noonan syndrome is a genetic disorder that aff...       10769     ok    NaN  
2  The National Hormone and Pituitary Program (NH...       12248     ok    NaN   

Encoding embeddings for gpt_professi

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GPT_PROFESSIONAL semantic accuracy: 0.500

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                             gpt_professional_answer  \
0  Familial adenomatous polyposis (FAP) is a here...   
1  Noonan syndrome is a genetic disorder that aff...   
2  The National Hormone and Pituitary Program (NH...   
3  Spastic diplegia is a form of cerebral palsy c...   
4  

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GPT_SPECIALIST semantic accuracy: 0.467

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                               gpt_specialist_answer  \
0  Familial adenomatous polyposis (FAP) is a here...   
1  Noonan syndrome is a genetic disorder that aff...   
2  Diagnosing conditions related to the National ...   
3  Spastic diplegia is a form of cerebral palsy c...   
4  Re

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GPT_FRIENDLY semantic accuracy: 0.553

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                                 gpt_friendly_answer  \
0  Familial adenomatous polyposis (FAP) is a gene...   
1  Noonan syndrome is a genetic condition that ca...   
2  Diagnosing issues related to the National Horm...   
3  Spastic diplegia is a type of cerebral palsy t...   
4  Yes,

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GPT_RUDE semantic accuracy: 0.580

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                                     gpt_rude_answer  gpt_rude_semantic_sim  \
0  Familial adenomatous polyposis (FAP) is primar...               0.794953   
1  Noonan syndrome is a genetic disorder that can...               0.711155   
2  To diagnose issues related to the National Hor...  

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GEMINI_PROFESSIONAL semantic accuracy: 0.493

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                          gemini_professional_answer  \
0  As a board-certified medical doctor with over ...   
1  As a board-certified medical doctor with over ...   
2  As a board-certified medical doctor with over ...   
3  Thank you for your question. As a board-certif...   


Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GEMINI_SPECIALIST semantic accuracy: 0.440

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                            gemini_specialist_answer  \
0  Familial Adenomatous Polyposis (FAP) is an aut...   
1  Noonan syndrome is a genetic disorder with var...   
2  The question "How to diagnose National Hormone...   
3  No, spastic diplegia cerebral palsy (CP) is **...   
4 

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GEMINI_FRIENDLY semantic accuracy: 0.460

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                              gemini_friendly_answer  \
0  Of course! Let's break down the genetic change...   
1  Hello there! I completely understand wanting a...   
2  That's an excellent and very important questio...   
3  That's a really important question, and I'd be...   
4  Y

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

GEMINI_RUDE semantic accuracy: 0.473

                                            question  \
0  What are the genetic changes related to famili...   
1      What are the treatments for Noonan syndrome ?   
2  How to diagnose National Hormone and Pituitary...   
3     Is Spastic diplegia cerebral palsy inherited ?   
4              Is restless legs syndrome inherited ?   

                                              answer  \
0  Mutations in the APC gene cause both classic a...   
1  These resources address the diagnosis or manag...   
2  CJD is usually diagnosed based on signs and sy...   
3  Is spastic diplegia cerebral palsy inherited? ...   
4  The inheritance pattern of restless legs syndr...   

                                  gemini_rude_answer  \
0  Okay, let's get this sorted out quickly and cl...   
1  Okay, you're right, no more confusion. Let's g...   
2  Let's clear this up immediately. You cannot "d...   
3  Okay, I understand you want a clear, quick ans...   
4  Okay,

Unnamed: 0,model_label,semantic_accuracy
3,gpt_rude,0.58
2,gpt_friendly,0.553333
0,gpt_professional,0.5
4,gemini_professional,0.493333
7,gemini_rude,0.473333
1,gpt_specialist,0.466667
6,gemini_friendly,0.46
5,gemini_specialist,0.44


In [20]:
# SAVE PHASE 2 PROMPT SCORES FOR CHARTS

out_path = Path("evaluation") / "phase2_prompt_scores.csv"
out_path.parent.mkdir(parents=True, exist_ok=True)

scores_df.to_csv(out_path, index=False)

print("Saved phase2_prompt_scores.csv to:", out_path)

Saved phase2_prompt_scores.csv to: evaluation\phase2_prompt_scores.csv
