# **Load Raw Dataset**

In [None]:
import pandas as pd

# Load the dataset
csv_path = "/Users/sheetalpatnaik/Desktop/GENAI/test_2.csv"
df = pd.read_csv(csv_path)

# Preview
print(df.head())


   index             Figure_path  \
0     62  PMC8253867_Fig2_41.jpg   
1     65  PMC8253867_Fig2_42.jpg   
2     67  PMC8253873_Fig6_45.jpg   
3     68  PMC8253873_Fig6_46.jpg   
4     74  PMC8253873_Fig8_49.jpg   

                                             Caption  \
0  CT pulmonary angiogram reveals encasement and ...   
1  CT pulmonary angiogram reveals encasement and ...   
2  Axial STIR MR image of the tear of the patella...   
3  MRI axial view of the patellar tendon at 6 mon...   
4  Pre-injection axial STIR MR image showing inju...   

                                            Question  \
0   What is the name of the artery encased and di...   
1   Which artery is encased and displaced accordi...   
2  What is the structure affected by the tear sho...   
3   What is the imaging technique used in the fig...   
4   What type of MRI sequence was used for imaging?    

                        Choice A  \
0      A: Right Coronary Artery    
1   A:Left main coronary artery    
2

# **Data PreProcessing**

## Image Preprocessing

 Resize + Normalize

In [None]:
from PIL import Image
import torchvision.transforms as transforms
import os

# Image paths
image_dir_1 = "/Users/sheetalpatnaik/Desktop/GENAI/images"
image_dir_2 = "/Users/sheetalpatnaik/Desktop/GENAI/figures"

# Transform
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


Preprocess Image Function + Loop

In [None]:
from tqdm import tqdm

def preprocess_image(image_path):
    try:
        img = Image.open(image_path).convert("RGB")
        return image_transform(img)
    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None

preprocessed_images = {}

for idx, row in tqdm(df.iterrows(), total=len(df)):
    file_name = row["Figure_path"].strip()
    path1 = os.path.join(image_dir_1, file_name)
    path2 = os.path.join(image_dir_2, file_name)

    if os.path.exists(path1):
        tensor = preprocess_image(path1)
    elif os.path.exists(path2):
        tensor = preprocess_image(path2)
    else:
        continue

    if tensor is not None:
        preprocessed_images[file_name] = tensor

print("✅ Total preprocessed images:", len(preprocessed_images))


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33430/33430 [01:35<00:00, 350.22it/s]


✅ Total preprocessed images: 29021


## Text Preprocessing (Questions + Answers)

In [None]:
import re

def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r"[^a-z0-9\s\-:(),.%]", "", text)
    return text.strip()

df["Cleaned_Question"] = df["Question"].apply(clean_text)
df["Cleaned_Answer"] = df["Answer"].apply(clean_text)
df["Cleaned_Caption"] = df["Caption"].apply(clean_text)


In [None]:
print(df["Cleaned_Caption"])

0        ct pulmonary angiogram reveals encasement and ...
1        ct pulmonary angiogram reveals encasement and ...
2        axial stir mr image of the tear of the patella...
3        mri axial view of the patellar tendon at 6 mon...
4        pre-injection axial stir mr image showing inju...
                               ...                        
33425    replacement teeth of holotype right dentary (n...
33426    a large field oct image capturing the surface ...
33427    a camera image of the same section of tissues ...
33428    merged pars and oct (0.4 na) image of resected...
33429    merged pars and oct (0.4 na) image of resected...
Name: Cleaned_Caption, Length: 33430, dtype: object


## Dataset Pairing
Already done as we are using
df['Figure_path'], df['Cleaned_Question'], df['Cleaned_Answer']

##  Annotation Cleaning

In [None]:
valid_choices = {"a", "b", "c", "d", "A", "B", "C", "D"}

clean_df = df[
    df["Question"].notnull() &
    df["Answer"].notnull() &
    df["Choice A"].notnull() &
    df["Choice B"].notnull() &
    df["Choice C"].notnull() &
    df["Choice D"].notnull() &
    df["Question"].str.strip().ne("") &
    df["Answer"].str.strip().ne("") &
    df["Choice A"].str.strip().ne("") &
    df["Choice B"].str.strip().ne("") &
    df["Choice C"].str.strip().ne("") &
    df["Choice D"].str.strip().ne("") &
    df["Answer"].str.upper().isin(valid_choices)
].copy()

clean_df["Cleaned_Answer"] = clean_df["Answer"].str.upper().str.strip()


In [None]:
print(clean_df["Cleaned_Answer"])

0        B
1        D
2        C
3        C
4        C
        ..
33425    B
33426    D
33427    D
33428    C
33429    A
Name: Cleaned_Answer, Length: 33430, dtype: object


##  Data Filtering / Sampling

In [None]:
# Random 1000 for few-shot prompts
subset_df = clean_df.sample(n=1000, random_state=42).reset_index(drop=True)


In [None]:
print(subset_df)

       index                                  Figure_path  \
0      58420                    PMC8519188_FIG5_85295.jpg   
1      10664                    PMC8285465_Fig3_10775.jpg   
2     173124                   PMC8918112_Fig4_221411.jpg   
3     883387                   PMC8225413_fig2_475661.jpg   
4     186762                   PMC9015882_fig2_255557.jpg   
..       ...                                          ...   
995   132642                     PMC8692788_F1_143932.jpg   
996    28951  PMC8350899_emmm202013695-fig-0003_30439.jpg   
997  1147805                    PMC8443912_Fig1_60310.jpg   
998   126366                   PMC8786746_Fig1_175557.jpg   
999   865651      PMC8162640_pone.0252544.g002_455897.jpg   

                                               Caption  \
0    CT scan of the chest post - chemotherapy showi...   
1    Orbit magnetic resonance imaging (MRI) at the ...   
2    posterior pole (Pos) is at the right in all ph...   
3    Excisional biopsy revealing in

## Format for Model Input

## Re-encoded PMC questions

In [None]:
from transformers import CLIPTokenizer, CLIPModel
import torch
import numpy as np

# Load CLIP tokenizer and model
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")

# Generate CLIP embeddings for PMC questions
def get_clip_text_embedding(text):
    inputs = clip_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        text_features = clip_model.get_text_features(**inputs)
    return text_features.squeeze().cpu().numpy()

pmc_questions = [item["question"] for item in prompt_data]
pmc_clip_embeddings = np.array([get_clip_text_embedding(q) for q in pmc_questions])
pmc_clip_embeddings = pmc_clip_embeddings / np.linalg.norm(pmc_clip_embeddings, axis=1, keepdims=True)

print("✅ Re-encoded PMC questions using CLIP. Shape:", pmc_clip_embeddings.shape)


✅ Re-encoded PMC questions using CLIP. Shape: (1000, 512)


In [None]:
import json

# Example: few-shot format (image path + question + choices)
prompt_data = []

for _, row in subset_df.iterrows():
    prompt = {
        "image": row["Figure_path"],
        "question": row["Cleaned_Question"],
        "choices": {
            "A": row["Choice A"],
            "B": row["Choice B"],
            "C": row["Choice C"],
            "D": row["Choice D"]
        },
        "answer": row["Cleaned_Answer"]
    }
    prompt_data.append(prompt)
print(json.dumps(prompt_data,indent=4))
# Save as JSON
with open("few_shot_prompts.json", "w") as f:
    json.dump(prompt_data, f, indent=2)



[
    {
        "image": "PMC8519188_FIG5_85295.jpg",
        "question": "what does the image depict about the patients tumor",
        "choices": {
            "A": " A:The tumor has grown larger ",
            "B": " B:The tumor has shrunk ",
            "C": " C:The tumor has not changed ",
            "D": " D:The image doesn't show tumor regression "
        },
        "answer": "B"
    },
    {
        "image": "PMC8285465_Fig3_10775.jpg",
        "question": "what imaging technique was used to capture the image",
        "choices": {
            "A": " A:CT scan ",
            "B": " B:Electroencephalography ",
            "C": " C:X-ray ",
            "D": " D:Magnetic resonance imaging "
        },
        "answer": "D"
    },
    {
        "image": "PMC8918112_Fig4_221411.jpg",
        "question": "what is located to the right in all the photographs",
        "choices": {
            "A": " A:The anterior pole ",
            "B": " B:The posterior pole ",
            "C": " 

# **Prompting Techniques**

## ZERO SHOT

In [None]:
!pip install openai

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [None]:

import json
from PIL import Image
import base64
from io import BytesIO
import os
import re



# Helper: Convert image to base64
def image_to_base64(image_path):
    with Image.open(image_path) as img:
        buffered = BytesIO()
        img.convert("RGB").save(buffered, format="JPEG")
        return base64.b64encode(buffered.getvalue()).decode("utf-8")

# Helper: Extract option letter (A–D)
def extract_option_letter(text):
    match = re.search(r'\b([A-D])\b', text, re.IGNORECASE)
    if match:
        return match.group(1).upper()
    return None

# 🔍 Helper: Check for low confidence
def is_low_confidence(answer):
    answer = answer.lower()
    if re.search(r"\b[a-d]\b.*\b[a-d]\b", answer, re.IGNORECASE):  # mentions multiple options
        return True
    uncertain_phrases = ["might be", "could be", "not sure", "maybe", "possibly"]
    return any(phrase in answer for phrase in uncertain_phrases)

# 🔄 Helper: Re-prompt the model for clarification
def get_clarified_answer(question, choices, image_b64):
    clarification_prompt = f"""
You are a medical expert. Analyze the image and answer the following multiple-choice question.

Question: {question}

Options:
A. {choices['A']}
B. {choices['B']}
C. {choices['C']}
D. {choices['D']}

Please respond with **only the correct option letter (A, B, C, or D)**. Do not provide explanation.
"""

    response = openai.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": clarification_prompt},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
                ]
            }
        ],
        max_tokens=50,
        temperature=0.3
    )
    return response.choices[0].message.content.strip()


# Dataset and path
image_folder = "/Users/sheetalpatnaik/Desktop/GENAI/figures/"
correct = 0
total = 0
image_count = 0

for idx, item in enumerate(prompt_data):
    if image_count >= 30:
        break

    question = item["question"]
    correct_answer = item["answer"].strip().upper()
    image_path = os.path.join(image_folder, item["image"].strip())

    if not os.path.exists(image_path):
        print(f"❌ Image not found: {image_path}")
        continue

    try:
        image_b64 = image_to_base64(image_path)

        prompt_text = f"""
You are a medical expert. Analyze the image and answer the following multiple-choice question.

Question: {item['question']}

Options:
A. {item['choices']['A']}
B. {item['choices']['B']}
C. {item['choices']['C']}
D. {item['choices']['D']}

Please respond with only the correct option letter (A, B, C, or D).
"""

        response = openai.chat.completions.create(
            model="gpt-4-turbo",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt_text},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_b64}"}}
                    ]
                }
            ],
            max_tokens=100,
            temperature=0.3
        )

        gpt_answer = response.choices[0].message.content.strip()
        predicted_option = extract_option_letter(gpt_answer)

        # 🔍 Re-prompt if prediction is invalid or confidence is low
        if predicted_option not in ["A", "B", "C", "D"] or is_low_confidence(gpt_answer):
            print("⚠️ Low confidence or invalid option detected. Re-prompting...")
            gpt_answer = get_clarified_answer(question, item["choices"], image_b64)
            predicted_option = extract_option_letter(gpt_answer)

        # 📊 Display results
        print(f"\n[Q{idx+1}] {question}")
        print(f"📌 Predicted Answer: {gpt_answer}")
        print(f"🔢 Predicted Option: {predicted_option if predicted_option else 'Not Found'}")
        print(f"✅ Actual Option: {correct_answer}")

        if predicted_option == correct_answer:
            correct += 1
        total += 1
        image_count += 1

    except Exception as e:
        print(f"❌ Error processing image {image_path}: {e}")

# Final accuracy
if total > 0:
    print(f"\n🔍 Overall Accuracy: {correct}/{total} = {correct / total:.2f}")
else:
    print("⚠️ No valid images processed.")


[Q1] what does the image depict about the patients tumor
📌 Predicted Answer: B
🔢 Predicted Option: B
✅ Actual Option: B

[Q2] what imaging technique was used to capture the image
📌 Predicted Answer: D.
🔢 Predicted Option: D
✅ Actual Option: D

[Q3] what is located to the right in all the photographs
📌 Predicted Answer: C. The vegetal pole
🔢 Predicted Option: C
✅ Actual Option: B
⚠️ Low confidence or invalid option detected. Re-prompting...

[Q4] what does the excisional biopsy reveal in this image
📌 Predicted Answer: C
🔢 Predicted Option: C
✅ Actual Option: C

[Q5] what approach was used for the surgery
📌 Predicted Answer: B. Posterior approach
🔢 Predicted Option: B
✅ Actual Option: B

[Q6] what is indicated in blue in the image
📌 Predicted Answer: C.
🔢 Predicted Option: C
✅ Actual Option: B
⚠️ Low confidence or invalid option detected. Re-prompting...

[Q7] which type of probe is used for both methods mentioned in the caption
📌 Predicted Answer: C.
🔢 Predicted Option: C
✅ Actual Opti

## FEW SHOT

In [None]:
!pip install clip

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [None]:
pip uninstall -y clip


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Found existing installation: clip 1.0
Uninstalling clip-1.0:
  Successfully uninstalled clip-1.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install git+https://github.com/openai/CLIP.git

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /private/var/folders/tc/5d149j1n63d449j9q57cyn1c0000gn/T/pip-req-build-onu5jbdc
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /private/var/folders/tc/5d149j1n63d449j9q57cyn1c0000gn/T/pip-req-build-onu5jbdc
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Preparing metadata (setup.py) ... [?25l- \ done
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l- \ | done
[?25h  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369490 sha256=9d08c6ad892f00a7c6b23e04479ae53398a7e91c35773317e5a59fee9b166dab
  Stored in directory: /private/var/folders/tc/5d149j1n63d449j9q57cyn1c0000gn/T/pip-ephem-wheel-cache-m9m102r6/wheels/35/3e/df/3d24cbfb3b6a06f17a2bfd7d1138900d4365d9028aa8f6e92f
Successfully built clip
Installing collected packages: clip

In [None]:
import clip
print(clip.__file__)

/opt/anaconda3/lib/python3.12/site-packages/clip/__init__.py


STEP 1: Load Libraries

In [None]:
import os
import json
import pickle
import numpy as np
from PIL import Image
from sklearn.neighbors import NearestNeighbors
from tqdm import tqdm
import openai
import torch
from transformers import CLIPProcessor, CLIPModel
device = "cuda" if torch.cuda.is_available() else "cpu"

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

Define a Function to Generate Embeddings

In [None]:
def get_embedding(image_path, question):
    # Load image
    image = Image.open(image_path).convert("RGB")

    # Process inputs for CLIP
    inputs = clip_processor(text=[question], images=image, return_tensors="pt", padding=True).to(device)

    # Get embeddings
    with torch.no_grad():
        outputs = clip_model(**inputs)
        image_emb = outputs.image_embeds[0].cpu().numpy()
        text_emb = outputs.text_embeds[0].cpu().numpy()

    # Combine them (simple average; you can also use concat if you prefer)
    combined_emb = (image_emb + text_emb) / 2
    return combined_emb

Load Your Embeddings (Pickle File)

In [None]:
with open("/Users/sheetalpatnaik/Desktop/GENAI/dataset_embeddings.pkl", "rb") as f:
    embeddings = pickle.load(f)

Fit KNN on Existing Embeddings

In [None]:
embedding_matrix = [item['embedding'] for item in embeddings]

knn = NearestNeighbors(n_neighbors=5, metric='cosine')
knn.fit(embedding_matrix)

Define Helper to Get Combined Embedding for a Query

In [None]:
image_folder = "/Users/sheetalpatnaik/Desktop/GENAI/figures"  # Replace with your folder path

def get_combined_embedding_from_query(query_entry):
    image_filename = query_entry["image"]
    question = query_entry["question"]

    image_path = os.path.join(image_folder, image_filename)
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    return get_embedding(image_path, question)

In [None]:

correct_answers = 0
total_queries = 20

for i in tqdm(range(total_queries)):
    query = prompt_data[i]

    # Step 1: Get query embedding
    query_embedding = get_combined_embedding_from_query(query)

    # Step 2: KNN - Find similar examples
    distances, indices = knn.kneighbors([query_embedding])
    similar_examples = [embeddings[j] for j in indices[0]]

    # Step 3: Build prompt
    prompt = "You are a helpful assistant. Based on the examples, answer the final question.\n\n"
    for idx, ex in enumerate(similar_examples):
        prompt += f"Example {idx+1}:\n"
        prompt += f"Q: {ex['question']}\n"
        prompt += f"A: {ex['answer']}\n"

    prompt += f"\nQ: {query['question']}\nChoices:\n"
    for k, v in query["choices"].items():
        prompt += f"{v}\n"
    prompt += f"A:"

    # Step 4: Call GPT-4
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=10,
        temperature=0.0
    )

    model_answer = response.choices[0].message.content.strip()

    print(f"Actual Answer: {query['answer']} | Predicted Answer: {model_answer}")

    # Step 5: Compare answer
    if model_answer[0].upper() == query["answer"].upper():
        correct_answers += 1

accuracy = correct_answers / total_queries * 100
print(f"\n✅ Accuracy on first {total_queries} queries: {accuracy:.2f}%")

  5%|████████▌                                                                                                                                                                   | 1/20 [00:03<00:58,  3.07s/it]

Actual Answer: B | Predicted Answer: B: The tumor has shrunk



 10%|█████████████████▏                                                                                                                                                          | 2/20 [00:05<00:44,  2.49s/it]

Actual Answer: D | Predicted Answer: D: Magnetic resonance imaging



 15%|█████████████████████████▊                                                                                                                                                  | 3/20 [00:05<00:29,  1.72s/it]

Actual Answer: B | Predicted Answer: B: The posterior pole



 20%|██████████████████████████████████▍                                                                                                                                         | 4/20 [00:06<00:22,  1.42s/it]

Actual Answer: C | Predicted Answer: C: Inguinal node metastasis



 25%|███████████████████████████████████████████                                                                                                                                 | 5/20 [00:07<00:19,  1.28s/it]

Actual Answer: B | Predicted Answer: B: Posterior approach



 30%|███████████████████████████████████████████████████▌                                                                                                                        | 6/20 [00:08<00:16,  1.15s/it]

Actual Answer: B | Predicted Answer: B: nucleus



 35%|████████████████████████████████████████████████████████████▏                                                                                                               | 7/20 [00:09<00:12,  1.01it/s]

Actual Answer: C | Predicted Answer: C: Linear probe



 40%|████████████████████████████████████████████████████████████████████▊                                                                                                       | 8/20 [00:10<00:11,  1.04it/s]

Actual Answer: B | Predicted Answer: B: Left septal/subcallosal area



 45%|█████████████████████████████████████████████████████████████████████████████▍                                                                                              | 9/20 [00:11<00:10,  1.05it/s]

Actual Answer: B | Predicted Answer: B: Hip Prosthesis



 50%|█████████████████████████████████████████████████████████████████████████████████████▌                                                                                     | 10/20 [00:11<00:08,  1.17it/s]

Actual Answer: C | Predicted Answer: C: Right ICA



 55%|██████████████████████████████████████████████████████████████████████████████████████████████                                                                             | 11/20 [00:12<00:07,  1.16it/s]

Actual Answer: D | Predicted Answer: D: Lateral



 60%|██████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                    | 12/20 [00:13<00:06,  1.22it/s]

Actual Answer: D | Predicted Answer: D: Lower right lobe



 65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                           | 13/20 [00:14<00:05,  1.20it/s]

Actual Answer: B | Predicted Answer: Type B dissection.



 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                   | 14/20 [00:15<00:05,  1.15it/s]

Actual Answer: B | Predicted Answer: B: right upper lobe



 75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                          | 15/20 [00:16<00:04,  1.01it/s]

Actual Answer: A | Predicted Answer: A complex viral envelope



 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                  | 16/20 [00:17<00:03,  1.08it/s]

Actual Answer: B | Predicted Answer: B: CT scan



 85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                         | 17/20 [00:18<00:02,  1.09it/s]

Actual Answer: D | Predicted Answer: D: Congenitally corrected transposition of



 90%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                 | 18/20 [00:19<00:01,  1.10it/s]

Actual Answer: B | Predicted Answer: B: Extension motion



 95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍        | 19/20 [00:20<00:00,  1.10it/s]

Actual Answer: D | Predicted Answer: First and second lines.


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:20<00:00,  1.05s/it]

Actual Answer: D | Predicted Answer: D: Four-part fracture

✅ Accuracy on first 20 queries: 90.00%





## Chain of Thought

Load Your Data and Embeddings

In [None]:
import json
import pickle
import os

# Paths

EMBEDDINGS_PKL_PATH = "/Users/sheetalpatnaik/Desktop/GENAI/dataset_embeddings.pkl"  # <-- update with actual path
IMAGE_FOLDER = "/Users/sheetalpatnaik/Desktop/GENAI/figures"                      # <-- path to image folder

# Load your 971 saved embeddings
with open(EMBEDDINGS_PKL_PATH, "rb") as f:
    saved_embeddings = pickle.load(f)

print(f"Loaded {len(saved_embeddings)} saved embeddings.")


print(f"Loaded {len(prompt_data)} VQA entries.")

Loaded 971 saved embeddings.
Loaded 1000 VQA entries.


Load CLIP Model and Preprocessing

In [None]:
import clip
import torch
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

Create (Image + Question) Embeddings in Same Format

In [None]:
from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
from torch.nn.functional import normalize

def generate_clip_embedding(image_path, question_text):
    # Load and preprocess image
    image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)

    # Tokenize text
    text = clip.tokenize([question_text]).to(device)

    # Encode both
    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text)

    # Combine (as you did before – assume you averaged or concatenated; here we use average)
    combined = (image_features + text_features) / 2
    combined = normalize(combined, dim=1).squeeze().tolist()

    return combined  # returns list of floats

Compute Cosine Distance and Retrieve Top-3 Examples

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def get_top_k_similar(embedding, all_embeddings, k=3):
    # Extract embedding vectors from saved data
    all_vectors = [entry["embedding"] for entry in all_embeddings]
    similarities = cosine_similarity([embedding], all_vectors)[0]

    # Get top k indices (excluding the query itself if needed)
    top_k_indices = np.argsort(similarities)[-k:][::-1]

    top_k_examples = [all_embeddings[i] for i in top_k_indices]
    return top_k_examples

Build CoT Prompt for GPT-4

In [None]:
def format_example(example):
    text = f"Image: {example['image']}\nQuestion: {example['question']}\n"
    for key, val in example["choices"].items():
        text += f"{key}: {val.strip()}\n"
    text += f"Answer: {example['answer']}\n"
    return text

In [None]:
def create_prompt(top_k_examples, current_example):
    prompt = (
        "You are a medical imaging expert. Study the examples below to understand the reasoning. "
        "Then answer the final question. Think step by step and justify your answer.\n\n"
    )
    for i, ex in enumerate(top_k_examples):
        prompt += f"Example {i+1}:\n{format_example(ex)}\n\n"

    prompt += f"Now answer the following:\n{format_example(current_example)}\n"
    prompt += "Answer:"
    return prompt

Full Pipeline for 20 Random Queries

In [None]:
import random

# Take 20 random queries
selected_queries = random.sample(prompt_data, 50)

for i, entry in enumerate(selected_queries):
    image_path = os.path.join(IMAGE_FOLDER, entry["image"])

    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}, skipping.")
        continue

    # Step 1: Create embedding
    current_embedding = generate_clip_embedding(image_path, entry["question"])

    # Step 2: Find top-3 examples
    top_k = get_top_k_similar(current_embedding, saved_embeddings, k=3)

    # Step 3: Create prompt
    prompt = create_prompt(top_k, entry)

    print(f"\n----- PROMPT FOR EXAMPLE {i+1} -----\n")
    print(prompt)
    print("\n" + "="*80 + "\n")




----- PROMPT FOR EXAMPLE 1 -----

You are a medical imaging expert. Study the examples below to understand the reasoning. Then answer the final question. Think step by step and justify your answer.

Example 1:
Image: PMC8590715_FIG5_112213.jpg
Question: what imaging technique was used to produce the adc map
A: A:Magnetic resonance imaging (MRI)
B: B:X-ray
C: C:Computed tomography (CT)
D: D:Positron emission tomography (PET)
Answer: A


Example 2:
Image: PMC8274757_f1_7979.jpg
Question: what type of imaging was used to obtain the image shown
A: A: Ultrasound
B: B: Magnetic Resonance Imaging
C: C: X-ray
D: D: Computed Tomography
Answer: B


Example 3:
Image: PMC8620805_jcm-10-05375-f004_122528.jpg
Question: what imaging technique was used to capture the image
A: A: X-ray
B: B: MRI
C: C: CT scan
D: D: PET scan
Answer: B


Now answer the following:
Image: PMC8590715_FIG5_112213.jpg
Question: what imaging technique was used to produce the adc map
A: A:Magnetic resonance imaging (MRI)
B: B:

SET UP OPEN AI

In [None]:
import os
import time
import re
from openai import OpenAI

# If hardcoding the API key for testing (safe only locally)


In [None]:
def ask_gpt4(prompt, model="gpt-4", temperature=0):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature
        )
        reply = response.choices[0].message.content
        return reply.strip()
    except Exception as e:
        print("Error calling GPT-4:", e)
        return None

Call GPT-4 and Extract Answer

In [None]:
def extract_choice(text):
    if text is None:
        return None
    match = re.search(r"\b([A-D])\b", text, re.IGNORECASE)
    if match:
        return match.group(1).upper()
    return None

Query → GPT-4 → Evaluation

In [None]:
correct = 0
total = 0
results = []

for i, entry in enumerate(selected_queries):
    image_path = os.path.join(IMAGE_FOLDER, entry["image"])
    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}, skipping.")
        continue

    current_embedding = generate_clip_embedding(image_path, entry["question"])
    top_k = get_top_k_similar(current_embedding, saved_embeddings, k=3)
    prompt = create_prompt(top_k, entry)

    print(f"\n----- Query {i+1} -----")
    print(f"Question: {entry['question']}")
    print(f"Ground Truth: {entry['answer']}")

    gpt_response = ask_gpt4(prompt)
    print("GPT-4 Response:", gpt_response)

    if gpt_response is None:
        print("Skipping due to failed GPT response.")
        continue

    predicted = extract_choice(gpt_response)
    print("Predicted Answer:", predicted)

    is_correct = predicted == entry["answer"]
    print("Correct:", is_correct)

    results.append({
        "question": entry["question"],
        "actual_answer": entry["answer"],
        "predicted_answer": predicted,
        "gpt_response": gpt_response,
        "correct": is_correct
    })

    if is_correct:
        correct += 1
    total += 1

    time.sleep(1.5)  # Be kind to API limits


----- Query 1 -----
Question: what imaging technique was used to produce the adc map
Ground Truth: A
GPT-4 Response: A
Predicted Answer: A
Correct: True

----- Query 2 -----
Question: 2) where are the cystic bone destructions located
Ground Truth: B
GPT-4 Response: B: In the skull
Predicted Answer: B
Correct: True

----- Query 3 -----
Question: which side of the ovaries was imaged in the given sample
Ground Truth: B
GPT-4 Response: B
Predicted Answer: B
Correct: True

----- Query 4 -----
Question: what was the result of the abdominal ct scan
Ground Truth: C
GPT-4 Response: C: The patient had a cecal cystic mass
Predicted Answer: C
Correct: True

----- Query 5 -----
Question: which knee is displayed in nonanatomic patient  6s mri
Ground Truth: B
GPT-4 Response: B
Predicted Answer: B
Correct: True

----- Query 6 -----
Question: which cranial nerves are involved due to inflammation
Ground Truth: A
GPT-4 Response: A
Predicted Answer: A
Correct: True

----- Query 7 -----
Question: what typ

PRINT ACCURACY

In [None]:
accuracy = correct / total if total > 0 else 0
print(f"\nFinal Accuracy: {accuracy * 100:.2f}%")


Final Accuracy: 100.00%


## Tree of Thought

In [None]:
import os
import json
import pickle
import torch
import clip
from PIL import Image
import numpy as np
import openai
import re
import time

# Paths
IMAGE_FOLDER = "/Users/sheetalpatnaik/Desktop/GENAI/figures"

PICKLE_PATH = "/Users/sheetalpatnaik/Desktop/GENAI/dataset_embeddings.pkl"

# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Load data
#with open(JSON_PATH, 'r') as f:
full_data = prompt_data

selected_queries = full_data[:30]

# Load saved embeddings
with open(PICKLE_PATH, 'rb') as f:
    saved_embeddings = pickle.load(f)

# Function to generate CLIP embedding
def generate_clip_embedding(image_path, question):
    image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)
    text = clip.tokenize([question]).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text)
    combined = (image_features + text_features) / 2
    return combined.cpu().numpy().flatten()

# Cosine similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Top-k retrieval
def get_top_k_similar(current_embedding, saved_embeddings, k=3):
    similarities = [
        (cosine_similarity(current_embedding, np.array(e['embedding'])), e)
        for e in saved_embeddings
    ]
    return sorted(similarities, key=lambda x: x[0], reverse=True)[:k]

# Tree of Thought Prompt
def create_tot_prompt(examples, query_item):
    prompt = "You are an expert radiologist. Below are examples of questions, options, and reasoning steps.\n"
    for _, ex in examples:
        prompt += f"\nImage: {ex['image']}\nQuestion: {ex['question']}\nChoices:\n"
        for key, val in ex['choices'].items():
            prompt += f"{key}: {val}\n"
        prompt += (
            f"Let’s explore different lines of reasoning to answer this question. "
            f"What are the possible interpretations of the image and the question? What could lead to different answers? "
            f"After considering all possibilities, choose the best answer and explain why it is correct.\n"
            f"Answer: {ex['answer']}\n"
        )

    prompt += f"\nNow consider the following:\nImage: {query_item['image']}\nQuestion: {query_item['question']}\nChoices:\n"
    for key, val in query_item['choices'].items():
        prompt += f"{key}: {val}\n"
    prompt += (
        "Let’s explore different lines of reasoning to answer this question. "
        "What are the possible interpretations of the image and the question? What could lead to different answers? "
        "After considering all possibilities, choose the best answer and explain why it is correct.\nAnswer:"
    )
    return prompt

# GPT-4 call
def ask_gpt4(prompt):
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.7
    )
    return response.choices[0].message.content.strip()

# Extract answer
def extract_choice(text):
    match = re.search(r"\b([A-D])\b", text, re.IGNORECASE)
    return match.group(1).upper() if match else "N/A"

# Run the full pipeline
correct = 0
total = 0
print("\n--- Tree of Thought Prompting Results ---\n")
for i, entry in enumerate(selected_queries):
    image_path = os.path.join(IMAGE_FOLDER, entry["image"])
    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}, skipping.\n")
        continue

    current_embedding = generate_clip_embedding(image_path, entry["question"])
    top_k = get_top_k_similar(current_embedding, saved_embeddings, k=3)
    prompt = create_tot_prompt(top_k, entry)

    print(f"--- Query {i+1} ---")
    print(f"Question: {entry['question']}")
    print(f"Actual Answer: {entry['answer']}")

    try:
        gpt_response = ask_gpt4(prompt)
    except Exception as e:
        print("Error with GPT-4:", e)
        gpt_response = ""

    print("GPT-4 Response:", gpt_response)

    predicted = extract_choice(gpt_response)
    print("Predicted Answer:", predicted)
    is_correct = predicted == entry["answer"]
    print("Correct:", is_correct, "\n")

    if is_correct:
        correct += 1
    total += 1

    time.sleep(1.5)  # Respect rate limits

# Final accuracy
print("===============")
print(f"Final Accuracy: {correct}/{total} = {correct/total:.2%}")


--- Tree of Thought Prompting Results ---

--- Query 1 ---
Question: what does the image depict about the patients tumor
Actual Answer: B
GPT-4 Response: B
Predicted Answer: B
Correct: True 

--- Query 2 ---
Question: what imaging technique was used to capture the image
Actual Answer: D
GPT-4 Response: D
Predicted Answer: D
Correct: True 

--- Query 3 ---
Question: what is located to the right in all the photographs
Actual Answer: B
GPT-4 Response: B
Predicted Answer: B
Correct: True 

--- Query 4 ---
Question: what does the excisional biopsy reveal in this image
Actual Answer: C
GPT-4 Response: C
Predicted Answer: C
Correct: True 

--- Query 5 ---
Question: what approach was used for the surgery
Actual Answer: B
GPT-4 Response: B
Predicted Answer: B
Correct: True 

--- Query 6 ---
Question: what is indicated in blue in the image
Actual Answer: B
GPT-4 Response: B: nucleus

Reasoning: The blue color in the image is typically used to indicate the nucleus of the cell in most cell diagra

# **RAG Implementation**

## Loading required libraries

In [None]:
# Step 1: Install datasets library
!pip install datasets

# Step 2: Load PubMedQA dataset
from datasets import load_dataset

# Automatically download it into Colab
pubmed_dataset = load_dataset("pubmed_qa", "pqa_labeled")

print(pubmed_dataset)

#print the first question
print(pubmed_dataset['train'][0])


Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/5.19k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['pubid', 'question', 'context', 'long_answer', 'final_decision'],
        num_rows: 1000
    })
})
{'pubid': 21645374, 'question': 'Do mitochondria play a role in remodelling lace plant leaves during programmed cell death?', 'context': {'contexts': ['Programmed cell death (PCD) is the regulated death of cells within an organism. The lace plant (Aponogeton madagascariensis) produces perforations in its leaves through PCD. The leaves of the plant consist of a latticework of longitudinal and transverse veins enclosing areoles. PCD occurs in the cells at the center of these areoles and progresses outwards, stopping approximately five cells from the vasculature. The role of mitochondria during PCD has been recognized in animals; however, it has been less studied during PCD in plants.', 'The following paper elucidates the role of mitochondrial dynamics during developmentally regulated PCD in vivo in A. madagascariensis. A single areole wi

## Create PMC-VQA Question Embeddings

In [None]:
pip install sentence-transformers pandas tqdm


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence-transformers)
 

In [None]:
# from sentence_transformers import SentenceTransformer
# import pickle
# import json

# # Step 2: Extract only the questions
# pmc_questions = [item['question'] for item in prompt_data]

# # Step 3: Load sentence transformer model
# model = SentenceTransformer('all-MiniLM-L6-v2')

# # Step 4: Create embeddings
# pmc_question_embeddings = model.encode(pmc_questions, batch_size=32, show_progress_bar=True)

# # Step 5: Save
# with open('/Users/sheetalpatnaik/Desktop/GENAI/pmc_question_embeddings.pkl', 'wb') as f:
#     pickle.dump({
#         "questions": pmc_questions,
#         "embeddings": pmc_question_embeddings
#     }, f)

# print("PMC-VQA question embeddings created and saved.")



import pandas as pd
import pickle
from tqdm import tqdm
from sentence_transformers import SentenceTransformer

# Load your matched dataset (with image names)
df = pd.read_csv("train_final.csv")

# Extract unique questions
questions = df["Question"].astype(str).tolist()

# Initialize text encoder (you can use any suitable model)
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Generate embeddings
print("⚙️ Generating text embeddings...")
embeddings = model.encode(questions, show_progress_bar=True, batch_size=64)

# Save to pickle
output = {
    "questions": questions,
    "embeddings": embeddings
}

with open("pmc_question_embeddings.pkl", "wb") as f:
    pickle.dump(output, f)

print("✅ Saved PMC-VQA question embeddings to pmc_question_embeddings.pkl")





modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

⚙️ Generating text embeddings...


Batches:   0%|          | 0/490 [00:00<?, ?it/s]

✅ Saved PMC-VQA question embeddings to pmc_question_embeddings.pkl


In [None]:
pip install torch torchvision transformers



In [None]:
pip install google

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting google
  Downloading google-3.0.0-py2.py3-none-any.whl.metadata (627 bytes)
Downloading google-3.0.0-py2.py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.3/45.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google
Successfully installed google-3.0.0
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
import os

# Show current working directory
print("📁 Current working directory:", os.getcwd())

# List all subfolders and files here
print("\n📂 Contents of current directory:")
print(os.listdir())



📁 Current working directory: /content

📂 Contents of current directory:
['.config', 'pmc_question_embeddings.pkl', 'train_final.csv', 'sample_data']


In [None]:
import os

figures_path = os.path.join("images_2", "figures")
print("🔍 Looking for figures folder at:", figures_path)
print("✅ Exists?" if os.path.exists(figures_path) else "❌ Does not exist")


🔍 Looking for figures folder at: /Users/sheetalpatnaik/Desktop/GENAI/figures
❌ Does not exist


In [None]:
# Set path to images folder (corrected for /app working directory)
image_folder = os.path.join("images_2", "figures")


In [None]:
import os
import torch
import pickle
from PIL import Image
from tqdm import tqdm
from transformers import CLIPProcessor, CLIPModel
import pandas as pd

# Load CSV with matched image names
df = pd.read_csv("train_final.csv")

# ✅ Set path to figures folder (inside /app/images_2/figures)
# image_folder = os.path.join("images_2", "figures")
image_folder = "/Users/sheetalpatnaik/Desktop/GENAI/figures"

# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Storage
image_embeddings = []
valid_image_names = []
missing_images = []

# Process each image
for img_name in tqdm(df["Image_Name"].dropna().unique(), desc="📸 Encoding images"):
    img_path = os.path.join(image_folder, img_name)

    if not os.path.isfile(img_path):
        missing_images.append(img_name)
        print(f"⚠️ Skipping missing image: {img_name}")
        continue

    try:
        image = Image.open(img_path).convert("RGB")
        inputs = processor(images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            emb = model.get_image_features(**inputs).squeeze().cpu().numpy()
        image_embeddings.append(emb)
        valid_image_names.append(img_name)
    except Exception as e:
        print(f"❌ Error processing {img_name}: {e}")

# Save to pickle
output = {
    "image_names": valid_image_names,
    "embeddings": image_embeddings
}

with open("pmc_image_embeddings.pkl", "wb") as f:
    pickle.dump(output, f)

print(f"\n✅ Saved {len(image_embeddings)} image embeddings to pmc_image_embeddings.pkl")
print(f"🚫 Skipped {len(missing_images)} missing images.")


📸 Encoding images:   0%|                                                                                                      | 0/19496 [00:00<?, ?it/s]Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))
📸 Encoding images: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 19496/19496 [12:52<00:00, 25.22it/s]



✅ Saved 19496 image embeddings to pmc_image_embeddings.pkl
🚫 Skipped 0 missing images.


## Code for PubMedQA Question Embeddings

In [None]:
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import pickle

# Step 1: Load PubMedQA dataset
pubmed_dataset = load_dataset("pubmed_qa", "pqa_labeled")['train']

# Step 2: Extract only the questions
pubmed_questions = [item['question'] for item in pubmed_dataset]

# Step 3: Load Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')  # same model for consistency

# Step 4: Create embeddings
pubmed_question_embeddings = model.encode(pubmed_questions, batch_size=32, show_progress_bar=True)

# Step 5: Save
with open('/Users/sheetalpatnaik/Desktop/GENAI/pubmed_question_embeddings.pkl', 'wb') as f:
    pickle.dump({
        "questions": pubmed_questions,
        "embeddings": pubmed_question_embeddings
    }, f)

print("✅ PubMedQA question embeddings created and saved.")

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

✅ PubMedQA question embeddings created and saved.


**RAG with question embeddings**

In [None]:
import pickle
import faiss
import numpy as np
import json
from datasets import load_dataset

# Step 1: Load PMC-VQA question embeddings
with open('/Users/sheetalpatnaik/Desktop/GENAI/pmc_question_embeddings.pkl', 'rb') as f:
    pmc_data = pickle.load(f)

pmc_questions = pmc_data['questions']
pmc_embeddings = pmc_data['embeddings']

# Step 2: Load PubMed question embeddings
with open('/Users/sheetalpatnaik/Desktop/GENAI/pubmed_question_embeddings.pkl', 'rb') as f:
    pubmed_data = pickle.load(f)

pubmed_questions = pubmed_data['questions']
pubmed_embeddings = pubmed_data['embeddings']

# Step 3: Load full PubMedQA dataset (for context, long answer)
pubmed_dataset = load_dataset("pubmed_qa", "pqa_labeled")['train']



# Step 5: Normalize embeddings (for cosine similarity)
pubmed_embeddings = pubmed_embeddings / np.linalg.norm(pubmed_embeddings, axis=1, keepdims=True)
pmc_embeddings = pmc_embeddings / np.linalg.norm(pmc_embeddings, axis=1, keepdims=True)

# Step 6: Build FAISS index
embedding_dim = pubmed_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(pubmed_embeddings)

print(f"✅ FAISS index built with {index.ntotal} vectors.")

# Step 7: Search and print results
results = []

for i in range(20):  # First 20 queries
    pmc_entry = prompt_data[i]  # a dict: question, options A-D, correct option, answer
    pmc_query_embedding = pmc_embeddings[i].reshape(1, -1)

    # Search top-1 match
    distances, indices = index.search(pmc_query_embedding, k=1)
    best_idx = indices[0][0]

    # Fetch matched PubMed record
    pubmed_item = pubmed_dataset[int(best_idx)]

    # Prepare result
    choices = pmc_entry.get('choices', {})

    result = {
        'Image Name': pmc_entry['image'],
        'PMC Question': pmc_entry['question'],
        'Option A': choices.get('A', ''),
        'Option B': choices.get('B', ''),
        'Option C': choices.get('C', ''),
        'Option D': choices.get('D', ''),
        'Correct Option': pmc_entry.get('answer', ''),
        'Final Answer' : pmc_entry.get('answer', ''),
        'PubMed Retrieved Question': pubmed_item['question'],
        'PubMed Context': " ".join(pubmed_item['context']['contexts']),
        'PubMed Long Answer': pubmed_item['long_answer']
    }

    results.append(result)

# Step 8: Print results
for idx, item in enumerate(results):
    print(f"\n--- Result {idx+1} ---")
    print(f": {item['Image Name']}")
    print(f"PMC Question: {item['PMC Question']}")
    print(f": {item['Option A']}")
    print(f": {item['Option B']}")
    print(f": {item['Option C']}")
    print(f": {item['Option D']}")
    print(f"Correct Option: {item['Correct Option']}")
    print("\nRetrieved PubMed Info:")
    print(f"PubMed Question: {item['PubMed Retrieved Question']}")
    print(f"PubMed Context: {item['PubMed Context']}")
    print(f"PubMed Long Answer: {item['PubMed Long Answer']}")

✅ FAISS index built with 1000 vectors.

--- Result 1 ---
: PMC8519188_FIG5_85295.jpg
PMC Question: what does the image depict about the patients tumor
:  A:The tumor has grown larger 
:  B:The tumor has shrunk 
:  C:The tumor has not changed 
:  D:The image doesn't show tumor regression 
Correct Option: B

Retrieved PubMed Info:
PubMed Question: Should tumor depth be included in prognostication of soft tissue sarcoma?
PubMed Context: Most staging systems for soft tissue sarcoma are based on histologic malignancy-grade, tumor size and tumor depth. These factors are generally dichotomized, size at 5 cm. We believe it is unlikely that tumor depth per se should influence a tumor's metastatic capability. Therefore we hypothesized that the unfavourable prognostic importance of depth could be explained by the close association between size and depth, deep-seated tumors on average being larger than the superficial ones. When tumor size is dichotomized, this effect should be most pronounced in 

In [None]:
print(prompt_data)

[{'image': 'PMC8519188_FIG5_85295.jpg', 'question': 'what does the image depict about the patients tumor', 'choices': {'A': ' A:The tumor has grown larger ', 'B': ' B:The tumor has shrunk ', 'C': ' C:The tumor has not changed ', 'D': " D:The image doesn't show tumor regression "}, 'answer': 'B'}, {'image': 'PMC8285465_Fig3_10775.jpg', 'question': 'what imaging technique was used to capture the image', 'choices': {'A': ' A:CT scan ', 'B': ' B:Electroencephalography ', 'C': ' C:X-ray ', 'D': ' D:Magnetic resonance imaging '}, 'answer': 'D'}, {'image': 'PMC8918112_Fig4_221411.jpg', 'question': 'what is located to the right in all the photographs', 'choices': {'A': ' A:The anterior pole ', 'B': ' B:The posterior pole ', 'C': ' C:The vegetal pole ', 'D': ' D:The lateral pole '}, 'answer': 'B'}, {'image': 'PMC8225413_fig2_475661.jpg', 'question': 'what does the excisional biopsy reveal in this image', 'choices': {'A': ' A:Primary tumor ', 'B': ' B: Epidural tumor ', 'C': ' C: Inguinal node m

In [None]:
from openai import OpenAI
import base64
import os



# Image folder path
image_folder_path = "/Users/sheetalpatnaik/Desktop/GENAI/figures"

gpt4v_answers = []

for idx, item in enumerate(results):
    image_filename = item['Image Name']  # Ensure you added this to your results list
    image_path = os.path.join(image_folder_path, image_filename)

    # Load and encode image
    with open(image_path, "rb") as img_file:
        image_data = base64.b64encode(img_file.read()).decode('utf-8')

    # Prepare prompt text
    prompt_text = f"""
You are a helpful medical AI assistant. Based on the following information and the provided image, answer the question correctly.
Refer the context and long answer provided.


Question:
{item['PMC Question']}

Options:
A. {item['Option A']}
B. {item['Option B']}
C. {item['Option C']}
D. {item['Option D']}

Knowledge Base Information:
{item['PubMed Context']}
{item['PubMed Long Answer']}

Please choose the most appropriate option (A, B, C, or D).
"""

    # GPT-4-Turbo Vision Request
    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_text},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{image_data}"
                        }
                    }
                ]
            }
        ],
        temperature=0.0,
        max_tokens=300
    )

    # Extract GPT-4-Vision answer
    answer = response.choices[0].message.content.strip()

    gpt4v_answers.append({
        'PMC Question': item['PMC Question'],
        'Options': {
            'A': item['Option A'],
            'B': item['Option B'],
            'C': item['Option C'],
            'D': item['Option D'],
        },
        'Correct Option': item['Correct Option'],
        'GPT-4V Answer': answer,
        'Image Name': image_filename,
        'PubMed Retrieved Question': item['PubMed Retrieved Question'],
        'PubMed Context': item['PubMed Context'],
        'PubMed Long Answer': item['PubMed Long Answer']
    })

    print(f"✅ Processed Query {idx+1}")

# FINAL PRINTING (outside loop)
for idx, item in enumerate(gpt4v_answers):
    print(f"\n=================== Result {idx+1} ===================")
    print(f"Image Name: {item['Image Name']}")
    print(f"PMC Question:\n{item['PMC Question']}\n")

    print("Options:")
    print(f"A: {item['Options']['A']}")
    print(f"B: {item['Options']['B']}")
    print(f"C: {item['Options']['C']}")
    print(f"D: {item['Options']['D']}\n")

    print(f"Correct Answer: {item['Correct Option']}")
    print(f"GPT-4 Predicted Answer: {item['GPT-4V Answer']}\n")

    print("Retrieved PubMed Question:")
    print(item['PubMed Retrieved Question'])

    print("\nRetrieved PubMed Context:")
    print(item['PubMed Context'])

    print("\nRetrieved PubMed Long Answer:")
    print(item['PubMed Long Answer'])
    print("============================================================")

✅ Processed Query 1
✅ Processed Query 2
✅ Processed Query 3
✅ Processed Query 4
✅ Processed Query 5
✅ Processed Query 6
✅ Processed Query 7
✅ Processed Query 8
✅ Processed Query 9
✅ Processed Query 10
✅ Processed Query 11
✅ Processed Query 12
✅ Processed Query 13
✅ Processed Query 14
✅ Processed Query 15
✅ Processed Query 16
✅ Processed Query 17
✅ Processed Query 18
✅ Processed Query 19
✅ Processed Query 20

Image Name: PMC8519188_FIG5_85295.jpg
PMC Question:
what does the image depict about the patients tumor

Options:
A:  A:The tumor has grown larger 
B:  B:The tumor has shrunk 
C:  C:The tumor has not changed 
D:  D:The image doesn't show tumor regression 

Correct Answer: B
GPT-4 Predicted Answer: Based on the provided image, which shows a measurement of 50.2 mm across a lesion in the lung, the correct answer to the question about the tumor's status cannot be determined solely from this single image. The image shows a measurement but does not provide comparative data to indicate wh

In [None]:
import re

correct = 0
total = len(gpt4v_answers)

for item in gpt4v_answers:
    # Extract just the letter (A/B/C/D) from GPT-4's answer using regex
    match = re.search(r"\b([A-D])\b", item['GPT-4V Answer'].upper())
    predicted = match.group(1) if match else None
    actual = item['Correct Option'].strip().upper()

    if predicted == actual:
        correct += 1

accuracy = correct / total * 100

print(f"\n✅ GPT-4 Accuracy: {accuracy:.2f}% ({correct}/{total} correct)")


✅ GPT-4 Accuracy: 50.00% (10/20 correct)


# **RAG with question and Image embeddings**

### Downloading SLAKE dataset

In [None]:
from datasets import load_dataset

# Load the English-only subset of SLAKE
dataset = load_dataset("mdwiratathya/SLAKE-vqa-english")


README.md:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

train-00000-of-00002.parquet:   0%|          | 0.00/31.1M [00:00<?, ?B/s]

train-00001-of-00002.parquet:   0%|          | 0.00/12.2M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/8.34M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/9.59M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4919 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1053 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1061 [00:00<?, ? examples/s]

In [None]:
# Check dataset structure
print(dataset)

# Print a sample item
print(dataset['train'][0])


DatasetDict({
    train: Dataset({
        features: ['image', 'question', 'answer'],
        num_rows: 4919
    })
    validation: Dataset({
        features: ['image', 'question', 'answer'],
        num_rows: 1053
    })
    test: Dataset({
        features: ['image', 'question', 'answer'],
        num_rows: 1061
    })
})
{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=256x256 at 0x668DAA6F0>, 'question': 'What modality is used to take this image?', 'answer': 'MRI'}


### Generate SLAKE Image Embeddings

In [None]:
from PIL import Image
from tqdm import tqdm
import torch
import pickle
from transformers import CLIPProcessor, CLIPModel

# Load model + processor
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

# Get all images from SLAKE training set
images = [item['image'] for item in dataset['train']]
image_ids = list(range(len(images)))  # or use custom IDs if needed

# Generate embeddings
image_embeddings = []
for img in tqdm(images, desc="Embedding SLAKE Images"):
    inputs = processor(images=img, return_tensors="pt").to(model.device)
    with torch.no_grad():
        image_features = model.get_image_features(**inputs)
        image_embeddings.append(image_features.cpu().numpy().squeeze())

# Stack and normalize
image_embeddings = np.vstack(image_embeddings)
image_embeddings = image_embeddings / np.linalg.norm(image_embeddings, axis=1, keepdims=True)

# Save
with open("slake_image_embeddings.pkl", "wb") as f:
    pickle.dump({
        "image_ids": image_ids,
        "embeddings": image_embeddings
    }, f)

print("✅ SLAKE image embeddings generated and saved.")


 ## Load all necessary pickle files of embeddings

In [None]:
!pip install faiss-cpu


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Downloading faiss_cpu-1.11.0-cp310-cp310-manylinux_2_28_x86_64.whl (31.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m49.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.11.0
[0m

In [None]:
!pip install openai

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting openai
  Downloading openai-1.77.0-py3-none-any.whl.metadata (25 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Downloading pydantic-2.11.4-py3-none-any.whl.metadata (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.6/66.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting typing-extensions<5,>=4.11 (from openai)
  Downloading typing_extensions-4.13.2-py3-none-any.whl.metadata (3.0 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.9-py3-none-any.whl.metadata (21 kB)
Collecting h11>=0.16 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.16.0-py3-none-any.whl.metadata (8.3 kB)
Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->openai)
 

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-20.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.66.3 (from datasets)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting aiohttp (from datasets)
  Downloading aiohttp-3.11.18-cp310-cp310-manylinux_2_17_

In [None]:
# Step 1: Clean uninstall
!pip uninstall -y pydantic openai typing_extensions

# Step 2: Install exact compatible versions
!pip install pydantic==2.5.3 openai==1.3.9 typing_extensions==4.7.1


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Found existing installation: pydantic 2.0
Uninstalling pydantic-2.0:
  Successfully uninstalled pydantic-2.0
Found existing installation: openai 1.2.4
Uninstalling openai-1.2.4:
  Successfully uninstalled openai-1.2.4
Found existing installation: typing_extensions 4.7.1
Uninstalling typing_extensions-4.7.1:
  Successfully uninstalled typing_extensions-4.7.1
[0m

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting pydantic==2.5.3
  Downloading pydantic-2.5.3-py3-none-any.whl.metadata (65 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.6/65.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai==1.3.9
  Downloading openai-1.3.9-py3-none-any.whl.metadata (17 kB)
Collecting typing_extensions==4.7.1
  Using cached typing_extensions-4.7.1-py3-none-any.whl.metadata (3.1 kB)
Collecting pydantic-core==2.14.6 (from pydantic==2.5.3)
  Downloading pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)
Downloading pydantic-2.5.3-py3-none-any.whl (381 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m381.9/381.9 kB[0m [31m30.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading openai-1.3.9-py3-none-any.whl (221 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m221.4/221.4 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached typing_extensions-4.7.1-py3-none-any.whl (33 kB)


In [None]:
from openai import OpenAI

print("✅ OpenAI import successful!")


✅ OpenAI import successful!


## Reading all embedding files

In [None]:
import json
import torch
import pickle
import numpy as np
from PIL import Image
from tqdm import tqdm
import faiss
import os
from openai import OpenAI
import openai
from datasets import load_dataset
import pandas as pd

# Load question-image CSV
df = pd.read_csv("train_final.csv").dropna(subset=["Image_Name"]).reset_index(drop=True)

# Load PMC question embeddings
with open("pmc_question_embeddings.pkl", "rb") as f:
    pmc_q_data = pickle.load(f)
pmc_questions = pmc_q_data["questions"]
pmc_q_embeddings = np.array(pmc_q_data["embeddings"])
pmc_q_embeddings /= np.linalg.norm(pmc_q_embeddings, axis=1, keepdims=True)

# Load PMC image embeddings
with open("pmc_image_embeddings.pkl", "rb") as f:
    pmc_img_data = pickle.load(f)
pmc_img_embeddings = np.array(pmc_img_data["embeddings"])
pmc_img_embeddings /= np.linalg.norm(pmc_img_embeddings, axis=1, keepdims=True)

# Load PubMedQA
pubmed_dataset = load_dataset("pubmed_qa", "pqa_labeled")["train"]
with open("pubmed_question_embeddings.pkl", "rb") as f:
    pubmed_data = pickle.load(f)
pubmed_q_embeddings = np.array(pubmed_data["embeddings"])
pubmed_q_embeddings /= np.linalg.norm(pubmed_q_embeddings, axis=1, keepdims=True)
pubmed_index = faiss.IndexFlatL2(pubmed_q_embeddings.shape[1])
pubmed_index.add(pubmed_q_embeddings)

# Load SLAKE
slake_dataset = load_dataset("mdwiratathya/SLAKE-vqa-english")["train"]
with open("slake_image_embeddings.pkl", "rb") as f:
    slake_data = pickle.load(f)
slake_embeddings = np.array(slake_data["embeddings"])
slake_embeddings /= np.linalg.norm(slake_embeddings, axis=1, keepdims=True)
slake_index = faiss.IndexFlatL2(slake_embeddings.shape[1])
slake_index.add(slake_embeddings)




In [None]:
!pip uninstall openai -y



Found existing installation: openai 0.27.2
Uninstalling openai-0.27.2:
  Successfully uninstalled openai-0.27.2
[0m

In [None]:
!pip install openai==0.27.2



Collecting openai==0.27.2
  Using cached openai-0.27.2-py3-none-any.whl.metadata (13 kB)
Using cached openai-0.27.2-py3-none-any.whl (70 kB)
Installing collected packages: openai
Successfully installed openai-0.27.2
[0m

In [None]:
pubmed_dataset = load_dataset("pubmed_qa", "pqa_labeled")["train"]

In [None]:
slake_dataset = load_dataset("mdwiratathya/SLAKE-vqa-english")["train"]

## Training

In [None]:
import os
import base64
import json
import numpy as np
from tqdm import tqdm
from openai import OpenAI
import pandas as pd
import faiss



# Base path to image folder - will be combined with Figure_path
image_folder = "/Users/sheetalpatnaik/Desktop/GENAI/figures"

def build_rag_context(question, options, pmc_q_embedding, pmc_img_embedding, pubmed_dataset,
                     pubmed_q_embeddings, pubmed_index, slake_dataset, slake_index):
    """
    Build RAG context by matching:
    1. PMC question embedding with PubMed question embeddings
    2. PMC image embedding with SLAKE image embeddings
    """
    context = ""

    # 1. Match PMC question embedding with PubMed question embeddings
    top_k_pubmed = 3
    distances, indices = pubmed_index.search(np.array([pmc_q_embedding]), top_k_pubmed)

    # Add PubMed question-answer context
    context += "Related medical information from literature:\n"
    for i, idx in enumerate(indices[0]):
        try:
            pubmed_item = pubmed_dataset[int(idx)]
            context += f"Reference {i+1}:\n"
            context += f"Question: {pubmed_item['question']}\n"

            # Safe way to extract context - avoid direct slicing
            pubmed_context = pubmed_item.get('context', '')
            if len(pubmed_context) > 300:
                pubmed_context = pubmed_context[:300] + "..."
            context += f"Context: {pubmed_context}\n"

            # Using 'long_answer' if available, otherwise 'final_decision'
            answer_text = pubmed_item.get('long_answer', pubmed_item.get('final_decision', 'No answer available'))
            context += f"Answer: {answer_text}\n\n"
        except Exception as e:
            print(f"Error accessing pubmed_dataset at index {idx}: {e}")

    # 2. Match PMC image embedding with SLAKE image embeddings
    top_k_slake = 3
    distances, indices = slake_index.search(np.array([pmc_img_embedding]), top_k_slake)

    # Add SLAKE image-question-answer context
    context += "Information from similar medical images:\n"
    for i, idx in enumerate(indices[0]):
        try:
            slake_item = slake_dataset[int(idx)]
            context += f"Similar Image {i+1}:\n"
            context += f"Question: {slake_item['question']}\n"
            context += f"Answer: {slake_item['answer']}\n\n"
        except Exception as e:
            print(f"Error accessing slake_dataset at index {idx}: {e}")

    return context

def find_best_matching_embedding(question, pmc_questions, pmc_q_embeddings):
    """Find the best matching question embedding from PMC data"""
    best_q_idx = 0  # Default to the first one if no match found
    best_match_score = float('-inf')

    for q_idx, q in enumerate(pmc_questions):
        # Simple text matching heuristic
        if question.lower() in q.lower() or q.lower() in question.lower():
            match_score = len(set(question.lower().split()) & set(q.lower().split()))
            if match_score > best_match_score:
                best_match_score = match_score
                best_q_idx = q_idx

    return pmc_q_embeddings[best_q_idx]

def find_image_embedding(img_name, pmc_image_names, pmc_img_embeddings):
    """Find the image embedding from PMC data"""
    try:
        img_idx = pmc_image_names.index(img_name)
        return pmc_img_embeddings[img_idx]
    except ValueError:
        # If image not found, return the first embedding as fallback
        print(f"⚠️ Image {img_name} not found in PMC embeddings, using fallback")
        return pmc_img_embeddings[0]

def extract_option_letter(prediction_text):
    """
    Extract the option letter (A, B, C, or D) from the prediction text.
    Returns the first occurrence of A, B, C, or D (case-insensitive).
    """
    prediction_lower = prediction_text.lower()

    # First check for patterns like "Option A" or "A." at the beginning
    import re
    for pattern in [r"option\s+([abcd])", r"^([abcd])[.:]", r"^([abcd])\s+"]:
        matches = re.findall(pattern, prediction_lower)
        if matches:
            return matches[0].upper()

    # Then check for any occurrence of A, B, C, D
    for option in ['a', 'b', 'c', 'd']:
        if option in prediction_lower:
            # Return the uppercase letter
            return option.upper()

    # If no option is found, return the first word as a fallback
    words = prediction_text.split()
    if words:
        return words[0].upper()

    # Final fallback: return 'A'
    return 'A'

def run_multiple_choice_rag(samples, pmc_questions, pmc_q_embeddings, pmc_image_names, pmc_img_embeddings,
                   pubmed_dataset, pubmed_q_embeddings, pubmed_index,
                   slake_dataset, slake_embeddings, slake_index, num_samples=20):
    """
    Run the multiple-choice medical RAG system

    Parameters:
    - samples: List of dictionaries from DataFrame with correct column names
    - Other parameters: The various embeddings and datasets
    """
    # Limit to specified number of samples
    if num_samples and len(samples) > num_samples:
        samples = samples[:num_samples]

    results = []
    correct_count = 0
    total_processed = 0

    for i, sample in tqdm(enumerate(samples), total=len(samples), desc="Running Multiple-Choice Medical RAG"):
        try:
            # Extract data from sample using correct column names
            figure_path = sample["Figure_path"]
            img_name = os.path.basename(figure_path)  # Extract just the filename for embedding lookup
            question = sample["Question"]

            # Extract options using correct column names
            options = {
                'A': sample.get('Choice A', ''),
                'B': sample.get('Choice B', ''),
                'C': sample.get('Choice C', ''),
                'D': sample.get('Choice D', '')
            }

            # Get the ground truth answer
            ground_truth = sample.get("Answer", "")

            # Get the full image path
            if os.path.isabs(figure_path):
                img_path = figure_path
            else:
                img_path = os.path.join(image_folder, figure_path)

            if not os.path.isfile(img_path):
                print(f"⚠️ Image not found: {img_path}")
                continue

            # Find best matching question embedding
            pmc_q_embedding = find_best_matching_embedding(question, pmc_questions, pmc_q_embeddings)

            # Find image embedding using the basename of the image file
            pmc_img_embedding = find_image_embedding(img_name, pmc_image_names, pmc_img_embeddings)

            # Build RAG context
            context = build_rag_context(
                question, options, pmc_q_embedding, pmc_img_embedding,
                pubmed_dataset, pubmed_q_embeddings, pubmed_index,
                slake_dataset, slake_index
            )

            # Format options for the prompt
            options_text = "\n".join([f"{key}: {value}" for key, value in options.items() if value])

            # Prepare prompt specifically for multiple-choice
            prompt = f"""You are a specialized medical AI assistant. Answer the following multiple-choice medical question based on the image provided and the retrieved context.

Question: {question}

Options:
{options_text}

Retrieved Context:
{context}

Please analyze the image and the retrieved context carefully, then select ONLY ONE option from choices A, B, C, or D that best answers the question.

Your response must begin with the letter of your chosen option (A, B, C, or D), followed by your explanation. For example: "A. This is the correct answer because..."

Only choose from the provided options. Do not create your own answer."""

            # Read and encode image
            with open(img_path, "rb") as f:
                encoded_image = base64.b64encode(f.read()).decode("utf-8")

            # Make GPT-4 Turbo API call
            try:
                response = client.chat.completions.create(
                    model="gpt-4-turbo",  # Use GPT-4 Turbo
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": prompt},
                                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
                            ]
                        }
                    ],
                    temperature=0,
                    max_tokens=500
                )
            except Exception as e:
                print(f"Error with gpt-4-turbo: {e}")
                # Fallback to gpt-4o
                try:
                    response = client.chat.completions.create(
                        model="gpt-4o",  # Fallback to GPT-4o
                        messages=[
                            {
                                "role": "user",
                                "content": [
                                    {"type": "text", "text": prompt},
                                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
                                ]
                            }
                        ],
                        temperature=0,
                        max_tokens=500
                    )
                except Exception as e2:
                    raise Exception(f"Both models failed. gpt-4-turbo error: {e}, gpt-4o error: {e2}")

            # Extract prediction
            prediction_text = response.choices[0].message.content.strip()

            # Extract the selected option (A, B, C, or D)
            selected_option = extract_option_letter(prediction_text)

            # Check if the answer is correct
            is_correct = selected_option.upper() == ground_truth.upper() if ground_truth else False
            if is_correct:
                correct_count += 1

            # Add to results
            result = {
                "figure_path": figure_path,
                "question": question,
                "options": options,
                "context_length": len(context),
                "prediction_text": prediction_text,
                "selected_option": selected_option
            }

            if ground_truth:
                result["ground_truth"] = ground_truth
                result["is_correct"] = is_correct

            results.append(result)
            total_processed += 1

            # Print progress
            if (i + 1) % 5 == 0 or i == len(samples) - 1:
                if total_processed > 0:
                    current_accuracy = correct_count / total_processed
                    print(f"Progress: {i+1}/{len(samples)} - Current Accuracy: {current_accuracy:.4f} ({correct_count}/{total_processed})")

        except Exception as e:
            print(f"❌ Error processing sample {i}: {e}")

    # Calculate final accuracy
    accuracy = correct_count / total_processed if total_processed > 0 else 0
    print(f"Final Accuracy: {accuracy:.4f} ({correct_count}/{total_processed})")

    # Save results
    output_file = "medical_multiple_choice_results.json"
    with open(output_file, "w") as f:
        json.dump({
            "results": results,
            "accuracy": accuracy,
            "correct_count": correct_count,
            "total_samples": total_processed
        }, f, indent=2)

    print(f"✅ Results saved to {output_file}")

    return results, accuracy

def main():
    print("Starting multiple-choice medical RAG system...")

    # Extract pmc_image_names from pmc_img_data
    pmc_image_names = pmc_img_data["image_names"]

    # Convert DataFrame to list of dictionaries for easier processing
    samples = df.to_dict('records')

    # Run the RAG system
    num_samples = 200  # Adjust as needed
    results, accuracy = run_multiple_choice_rag(
        samples,
        pmc_questions, pmc_q_embeddings,
        pmc_image_names, pmc_img_embeddings,
        pubmed_dataset, pubmed_q_embeddings, pubmed_index,
        slake_dataset, slake_embeddings, slake_index,
        num_samples=num_samples
    )

    print("✅ Multiple-choice medical RAG process completed!")
    print(f"Final accuracy: {accuracy:.4f}")

if __name__ == "__main__":
    main()

Starting multiple-choice medical RAG system...


Running Multiple-Choice Medical RAG:   0%|                                                                                       | 0/200 [00:00<?, ?it/s]

⚠️ Image PMC8253867_Fig2_41.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   0%|▍                                                                              | 1/200 [00:06<20:14,  6.11s/it]

⚠️ Image PMC8253867_Fig2_42.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   1%|▊                                                                              | 2/200 [00:14<24:58,  7.57s/it]

⚠️ Image PMC8253873_Fig6_45.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   2%|█▏                                                                             | 3/200 [00:21<23:01,  7.01s/it]

⚠️ Image PMC8253873_Fig6_46.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   2%|█▌                                                                             | 4/200 [00:26<21:07,  6.47s/it]

⚠️ Image PMC8253873_Fig8_49.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   2%|█▉                                                                             | 5/200 [00:31<18:40,  5.75s/it]

Progress: 5/200 - Current Accuracy: 0.4000 (2/5)
⚠️ Image PMC8253908_fig2_52.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   3%|██▎                                                                            | 6/200 [00:35<17:22,  5.37s/it]

⚠️ Image PMC8253908_fig2_54.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   4%|██▊                                                                            | 7/200 [00:40<16:13,  5.05s/it]

⚠️ Image PMC8253963_FIG3_57.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   4%|███▏                                                                           | 8/200 [00:45<16:33,  5.18s/it]

⚠️ Image PMC8253999_fig1_63.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   4%|███▌                                                                           | 9/200 [00:50<16:06,  5.06s/it]

⚠️ Image PMC8254247_Fig5_90.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   5%|███▉                                                                          | 10/200 [00:55<15:35,  4.92s/it]

Progress: 10/200 - Current Accuracy: 0.5000 (5/10)
⚠️ Image PMC8254247_Fig5_92.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   6%|████▎                                                                         | 11/200 [00:59<14:58,  4.76s/it]

⚠️ Image PMC8254671_Fig3_214.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   6%|████▋                                                                         | 12/200 [01:04<14:58,  4.78s/it]

⚠️ Image PMC8254671_Fig3_214.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   6%|█████                                                                         | 13/200 [01:08<14:44,  4.73s/it]

⚠️ Image PMC8254711_Fig2_243.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   7%|█████▍                                                                        | 14/200 [01:12<14:04,  4.54s/it]

⚠️ Image PMC8254711_Fig2_244.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   8%|█████▊                                                                        | 15/200 [01:17<14:18,  4.64s/it]

Progress: 15/200 - Current Accuracy: 0.4667 (7/15)
⚠️ Image PMC8254711_Fig2_245.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   8%|██████▏                                                                       | 16/200 [01:25<17:04,  5.57s/it]

⚠️ Image PMC8254711_Fig2_247.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   8%|██████▋                                                                       | 17/200 [01:31<17:02,  5.59s/it]

⚠️ Image PMC8254711_Fig2_248.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   9%|███████                                                                       | 18/200 [01:36<16:45,  5.52s/it]

⚠️ Image PMC8254711_Fig2_248.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  10%|███████▍                                                                      | 19/200 [01:42<17:10,  5.69s/it]

⚠️ Image PMC8254711_Fig2_251.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  10%|███████▊                                                                      | 20/200 [01:48<17:11,  5.73s/it]

Progress: 20/200 - Current Accuracy: 0.4000 (8/20)
⚠️ Image PMC8254839_Fig7_267.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  10%|████████▏                                                                     | 21/200 [01:55<18:13,  6.11s/it]

⚠️ Image PMC8254839_Fig8_275.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  11%|████████▌                                                                     | 22/200 [02:00<16:56,  5.71s/it]

⚠️ Image PMC8254839_Fig8_276.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  12%|████████▉                                                                     | 23/200 [02:05<16:51,  5.72s/it]

⚠️ Image PMC8254839_Fig8_281.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  12%|█████████▎                                                                    | 24/200 [02:13<18:04,  6.16s/it]

⚠️ Image PMC8254990_Fig2_325.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  12%|█████████▊                                                                    | 25/200 [02:22<20:55,  7.17s/it]

Progress: 25/200 - Current Accuracy: 0.4800 (12/25)
⚠️ Image PMC8254990_Fig2_325.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  13%|██████████▏                                                                   | 26/200 [02:28<19:53,  6.86s/it]

⚠️ Image PMC8254990_Fig2_326.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  14%|██████████▌                                                                   | 27/200 [02:31<16:19,  5.66s/it]

⚠️ Image PMC8255017_Fig1_348.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  14%|██████████▉                                                                   | 28/200 [02:38<16:55,  5.90s/it]

⚠️ Image PMC8255017_Fig1_348.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  14%|███████████▎                                                                  | 29/200 [02:44<17:27,  6.13s/it]

⚠️ Image PMC8255017_Fig1_349.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  15%|███████████▋                                                                  | 30/200 [02:50<17:17,  6.10s/it]

Progress: 30/200 - Current Accuracy: 0.4333 (13/30)
⚠️ Image PMC8255017_Fig1_349.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  16%|████████████                                                                  | 31/200 [02:59<19:07,  6.79s/it]

⚠️ Image PMC8255034_DEV198820F2_366.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  16%|████████████▍                                                                 | 32/200 [03:07<20:11,  7.21s/it]

⚠️ Image PMC8255034_DEV198820F1_392.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  16%|████████████▊                                                                 | 33/200 [03:12<18:39,  6.71s/it]

⚠️ Image PMC8255034_DEV198820F1_394.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  17%|█████████████▎                                                                | 34/200 [03:18<17:34,  6.35s/it]

⚠️ Image PMC8255034_DEV198820F1_401.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  18%|█████████████▋                                                                | 35/200 [03:26<18:43,  6.81s/it]

Progress: 35/200 - Current Accuracy: 0.4571 (16/35)
⚠️ Image PMC8255034_DEV198820F1_404.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  18%|██████████████                                                                | 36/200 [03:35<20:40,  7.57s/it]

⚠️ Image PMC8255034_DEV198820F6_419.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  18%|██████████████▍                                                               | 37/200 [03:42<19:52,  7.32s/it]

⚠️ Image PMC8255034_DEV198820F6_422.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  19%|██████████████▊                                                               | 38/200 [03:48<18:44,  6.94s/it]

⚠️ Image PMC8255034_DEV198820F6_426.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  20%|███████████████▏                                                              | 39/200 [03:53<16:54,  6.30s/it]

⚠️ Image PMC8255034_DEV198820F6_430.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  20%|███████████████▌                                                              | 40/200 [03:59<16:25,  6.16s/it]

Progress: 40/200 - Current Accuracy: 0.4750 (19/40)
⚠️ Image PMC8255034_DEV198820F7_431.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  20%|███████████████▉                                                              | 41/200 [04:07<17:56,  6.77s/it]

⚠️ Image PMC8255034_DEV198820F7_431.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  21%|████████████████▍                                                             | 42/200 [04:13<17:35,  6.68s/it]

⚠️ Image PMC8255034_DEV198820F7_448.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  22%|████████████████▊                                                             | 43/200 [04:18<15:40,  5.99s/it]

⚠️ Image PMC8255034_DEV198820F7_452.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  22%|█████████████████▏                                                            | 44/200 [04:23<14:55,  5.74s/it]

⚠️ Image PMC8255114_FIG1_467.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  22%|█████████████████▌                                                            | 45/200 [04:29<14:52,  5.76s/it]

Progress: 45/200 - Current Accuracy: 0.4444 (20/45)
⚠️ Image PMC8255223_fig0005_470.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  23%|█████████████████▉                                                            | 46/200 [04:33<14:03,  5.47s/it]

⚠️ Image PMC8255223_fig0005_471.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  24%|██████████████████▎                                                           | 47/200 [04:39<14:09,  5.55s/it]

⚠️ Image PMC8255279_iju512297-fig-0001_508.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  24%|██████████████████▋                                                           | 48/200 [04:43<12:55,  5.10s/it]

⚠️ Image PMC8255284_iju512293-fig-0001_514.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  24%|███████████████████                                                           | 49/200 [04:47<12:06,  4.81s/it]

⚠️ Image PMC8255286_iju512299-fig-0001_523.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  25%|███████████████████▌                                                          | 50/200 [04:52<12:11,  4.88s/it]

Progress: 50/200 - Current Accuracy: 0.4400 (22/50)
⚠️ Image PMC8255286_iju512299-fig-0001_527.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  26%|███████████████████▉                                                          | 51/200 [04:57<11:59,  4.83s/it]

⚠️ Image PMC8255286_iju512299-fig-0002_529.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  26%|████████████████████▎                                                         | 52/200 [05:02<11:35,  4.70s/it]

⚠️ Image PMC8255286_iju512299-fig-0002_531.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  26%|████████████████████▋                                                         | 53/200 [05:06<11:08,  4.55s/it]

⚠️ Image PMC8255365_F3_556.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  27%|█████████████████████                                                         | 54/200 [05:10<10:35,  4.35s/it]

⚠️ Image PMC8255365_F4_565.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  28%|█████████████████████▍                                                        | 55/200 [05:18<13:18,  5.51s/it]

Progress: 55/200 - Current Accuracy: 0.4364 (24/55)
⚠️ Image PMC8255484_F1_574.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  28%|█████████████████████▊                                                        | 56/200 [05:24<13:31,  5.63s/it]

⚠️ Image PMC8255484_F1_574.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  28%|██████████████████████▏                                                       | 57/200 [05:27<11:57,  5.02s/it]

⚠️ Image PMC8255916_f1_646.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  29%|██████████████████████▌                                                       | 58/200 [05:32<11:30,  4.87s/it]

⚠️ Image PMC8255931_F4_664.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  30%|███████████████████████                                                       | 59/200 [05:38<12:37,  5.37s/it]

⚠️ Image PMC8255931_F4_666.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  30%|███████████████████████▍                                                      | 60/200 [05:42<11:25,  4.90s/it]

Progress: 60/200 - Current Accuracy: 0.4500 (27/60)
⚠️ Image PMC8255946_RSTA20200207F6_679.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  30%|███████████████████████▊                                                      | 61/200 [05:48<11:39,  5.04s/it]

⚠️ Image PMC8255948_RSTA20200204F2_687.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  31%|████████████████████████▏                                                     | 62/200 [05:52<11:03,  4.80s/it]

⚠️ Image PMC8256407_Fig1_796.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  32%|████████████████████████▌                                                     | 63/200 [05:57<11:02,  4.84s/it]

⚠️ Image PMC8256407_Fig1_799.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  32%|████████████████████████▉                                                     | 64/200 [06:02<11:21,  5.01s/it]

⚠️ Image PMC8256407_Fig1_800.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  32%|█████████████████████████▎                                                    | 65/200 [06:07<11:22,  5.05s/it]

Progress: 65/200 - Current Accuracy: 0.4462 (29/65)
⚠️ Image PMC8256407_Fig1_801.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  33%|█████████████████████████▋                                                    | 66/200 [06:14<12:04,  5.41s/it]

⚠️ Image PMC8256546_Fig3_834.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  34%|██████████████████████████▏                                                   | 67/200 [06:19<12:19,  5.56s/it]

⚠️ Image PMC8256567_Fig1_846.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  34%|██████████████████████████▌                                                   | 68/200 [06:27<13:39,  6.20s/it]

⚠️ Image PMC8256567_Fig2_856.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  34%|██████████████████████████▉                                                   | 69/200 [06:32<12:37,  5.78s/it]

⚠️ Image PMC8256567_Fig2_857.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  35%|███████████████████████████▎                                                  | 70/200 [06:36<11:34,  5.34s/it]

Progress: 70/200 - Current Accuracy: 0.4429 (31/70)
⚠️ Image PMC8256567_Fig2_857.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  36%|███████████████████████████▋                                                  | 71/200 [06:42<11:42,  5.45s/it]

⚠️ Image PMC8256567_Fig2_859.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  36%|████████████████████████████                                                  | 72/200 [06:47<11:25,  5.36s/it]

⚠️ Image PMC8256567_Fig2_860.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  36%|████████████████████████████▍                                                 | 73/200 [06:54<12:01,  5.69s/it]

⚠️ Image PMC8256567_Fig2_863.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  37%|████████████████████████████▊                                                 | 74/200 [07:01<13:16,  6.32s/it]

⚠️ Image PMC8256994_F1_922.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  38%|█████████████████████████████▎                                                | 75/200 [07:06<12:01,  5.77s/it]

Progress: 75/200 - Current Accuracy: 0.4533 (34/75)
⚠️ Image PMC8257052_F1_943.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  38%|█████████████████████████████▋                                                | 76/200 [07:11<11:31,  5.58s/it]

⚠️ Image PMC8257052_F1_944.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  38%|██████████████████████████████                                                | 77/200 [07:22<14:54,  7.27s/it]

⚠️ Image PMC8257052_F1_946.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  39%|██████████████████████████████▍                                               | 78/200 [07:27<13:14,  6.51s/it]

⚠️ Image PMC8257078_F1_949.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  40%|██████████████████████████████▊                                               | 79/200 [07:33<12:51,  6.37s/it]

⚠️ Image PMC8257078_F1_951.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  40%|███████████████████████████████▏                                              | 80/200 [07:39<12:43,  6.37s/it]

Progress: 80/200 - Current Accuracy: 0.4750 (38/80)
⚠️ Image PMC8257462_FIG3_1016.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  40%|███████████████████████████████▌                                              | 81/200 [07:47<13:06,  6.61s/it]

⚠️ Image PMC8257462_FIG3_1016.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  41%|███████████████████████████████▉                                              | 82/200 [07:52<12:18,  6.26s/it]

⚠️ Image PMC8257462_FIG3_1017.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  42%|████████████████████████████████▎                                             | 83/200 [07:59<12:28,  6.40s/it]

⚠️ Image PMC8257462_FIG3_1017.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  42%|████████████████████████████████▊                                             | 84/200 [08:04<11:42,  6.05s/it]

⚠️ Image PMC8257540_Fig3_1066.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  42%|█████████████████████████████████▏                                            | 85/200 [08:10<11:49,  6.17s/it]

Progress: 85/200 - Current Accuracy: 0.4824 (41/85)
⚠️ Image PMC8257540_Fig3_1074.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  43%|█████████████████████████████████▌                                            | 86/200 [08:16<11:14,  5.92s/it]

⚠️ Image PMC8257586_Fig3_1077.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  44%|█████████████████████████████████▉                                            | 87/200 [08:22<11:20,  6.02s/it]

⚠️ Image PMC8257586_Fig3_1082.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  44%|██████████████████████████████████▎                                           | 88/200 [08:35<15:22,  8.24s/it]

⚠️ Image PMC8257586_Fig3_1084.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  44%|██████████████████████████████████▋                                           | 89/200 [08:40<13:22,  7.23s/it]

⚠️ Image PMC8257803_Fig2_1155.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  45%|███████████████████████████████████                                           | 90/200 [08:48<13:31,  7.37s/it]

Progress: 90/200 - Current Accuracy: 0.4889 (44/90)
⚠️ Image PMC8257847_Fig2_1191.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  46%|███████████████████████████████████▍                                          | 91/200 [08:54<12:40,  6.97s/it]

⚠️ Image PMC8257847_Fig2_1191.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  46%|███████████████████████████████████▉                                          | 92/200 [08:59<11:29,  6.39s/it]

⚠️ Image PMC8257847_Fig2_1192.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  46%|████████████████████████████████████▎                                         | 93/200 [09:04<10:42,  6.01s/it]

⚠️ Image PMC8258112_F3_1227.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  47%|████████████████████████████████████▋                                         | 94/200 [09:09<10:08,  5.74s/it]

⚠️ Image PMC8258112_F3_1228.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  48%|█████████████████████████████████████                                         | 95/200 [09:14<09:39,  5.52s/it]

Progress: 95/200 - Current Accuracy: 0.4842 (46/95)
⚠️ Image PMC8258112_F3_1231.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  48%|█████████████████████████████████████▍                                        | 96/200 [09:23<11:01,  6.36s/it]

⚠️ Image PMC8258112_F3_1232.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  48%|█████████████████████████████████████▊                                        | 97/200 [09:29<11:09,  6.50s/it]

⚠️ Image PMC8258147_F2_1242.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  49%|██████████████████████████████████████▏                                       | 98/200 [09:34<10:02,  5.91s/it]

⚠️ Image PMC8258164_F10_1252.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  50%|██████████████████████████████████████▌                                       | 99/200 [09:39<09:26,  5.61s/it]

⚠️ Image PMC8258164_F10_1253.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  50%|██████████████████████████████████████▌                                      | 100/200 [09:42<08:21,  5.02s/it]

Progress: 100/200 - Current Accuracy: 0.4800 (48/100)
⚠️ Image PMC8258354_tca14000-fig-0001_1269.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  50%|██████████████████████████████████████▉                                      | 101/200 [09:48<08:21,  5.06s/it]

⚠️ Image PMC8258355_tca13992-fig-0003_1277.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  51%|███████████████████████████████████████▎                                     | 102/200 [09:52<07:44,  4.74s/it]

⚠️ Image PMC8258355_tca13992-fig-0003_1278.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  52%|███████████████████████████████████████▋                                     | 103/200 [09:55<07:12,  4.46s/it]

⚠️ Image PMC8258366_tca14010-fig-0001_1284.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  52%|████████████████████████████████████████                                     | 104/200 [10:01<07:31,  4.70s/it]

⚠️ Image PMC8258366_tca14010-fig-0001_1286.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  52%|████████████████████████████████████████▍                                    | 105/200 [10:06<07:57,  5.02s/it]

Progress: 105/200 - Current Accuracy: 0.4952 (52/105)
⚠️ Image PMC8258383_fig4_1290.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  53%|████████████████████████████████████████▊                                    | 106/200 [10:12<08:03,  5.14s/it]

⚠️ Image PMC8258435_f3_1306.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  54%|█████████████████████████████████████████▏                                   | 107/200 [10:27<12:35,  8.13s/it]

⚠️ Image PMC8258435_f3_1308.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  54%|█████████████████████████████████████████▌                                   | 108/200 [10:32<10:55,  7.12s/it]

⚠️ Image PMC8258435_f3_1314.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  55%|█████████████████████████████████████████▉                                   | 109/200 [10:38<10:27,  6.89s/it]

⚠️ Image PMC8258493_Fig7_1322.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  55%|██████████████████████████████████████████▎                                  | 110/200 [10:45<10:11,  6.79s/it]

Progress: 110/200 - Current Accuracy: 0.4909 (54/110)
⚠️ Image PMC8258493_Fig7_1322.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  56%|██████████████████████████████████████████▋                                  | 111/200 [10:51<09:55,  6.69s/it]

⚠️ Image PMC8258493_Fig7_1323.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  56%|███████████████████████████████████████████                                  | 112/200 [10:56<08:53,  6.07s/it]

⚠️ Image PMC8258720_fig1-14574969211000546_1398.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  56%|███████████████████████████████████████████▌                                 | 113/200 [11:00<08:12,  5.66s/it]

⚠️ Image PMC8258786_fig0001_1408.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  57%|███████████████████████████████████████████▉                                 | 114/200 [11:06<07:52,  5.49s/it]

⚠️ Image PMC8259209_Fig2_1533.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  57%|████████████████████████████████████████████▎                                | 115/200 [11:11<07:48,  5.51s/it]

Progress: 115/200 - Current Accuracy: 0.4870 (56/115)
⚠️ Image PMC8259360_Fig1_1548.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  58%|████████████████████████████████████████████▋                                | 116/200 [11:16<07:30,  5.36s/it]

⚠️ Image PMC8259360_Fig1_1549.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  58%|█████████████████████████████████████████████                                | 117/200 [11:21<07:16,  5.26s/it]

⚠️ Image PMC8259360_Fig2_1551.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  59%|█████████████████████████████████████████████▍                               | 118/200 [11:27<07:27,  5.46s/it]

⚠️ Image PMC8259360_Fig2_1552.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  60%|█████████████████████████████████████████████▊                               | 119/200 [11:33<07:26,  5.51s/it]

⚠️ Image PMC8259360_Fig2_1552.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  60%|██████████████████████████████████████████████▏                              | 120/200 [11:38<07:05,  5.32s/it]

Progress: 120/200 - Current Accuracy: 0.4833 (58/120)
⚠️ Image PMC8259360_Fig2_1553.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  60%|██████████████████████████████████████████████▌                              | 121/200 [11:44<07:32,  5.73s/it]

⚠️ Image PMC8259360_Fig2_1553.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  61%|██████████████████████████████████████████████▉                              | 122/200 [11:48<06:46,  5.21s/it]

⚠️ Image PMC8259393_fig2_1565.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  62%|███████████████████████████████████████████████▎                             | 123/200 [11:54<06:46,  5.28s/it]

⚠️ Image PMC8259403_f0005_1566.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  62%|███████████████████████████████████████████████▋                             | 124/200 [11:59<06:32,  5.17s/it]

⚠️ Image PMC8259403_f0005_1566.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  62%|████████████████████████████████████████████████▏                            | 125/200 [12:04<06:26,  5.15s/it]

Progress: 125/200 - Current Accuracy: 0.4880 (61/125)
⚠️ Image PMC8259403_f0010_1569.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  63%|████████████████████████████████████████████████▌                            | 126/200 [12:08<05:52,  4.76s/it]

⚠️ Image PMC8259403_f0010_1569.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  64%|████████████████████████████████████████████████▉                            | 127/200 [12:11<05:23,  4.43s/it]

⚠️ Image PMC8259403_f0010_1570.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  64%|█████████████████████████████████████████████████▎                           | 128/200 [12:19<06:26,  5.37s/it]

⚠️ Image PMC8259403_f0010_1570.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  64%|█████████████████████████████████████████████████▋                           | 129/200 [12:24<06:15,  5.29s/it]

⚠️ Image PMC8259403_f0015_1571.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  65%|██████████████████████████████████████████████████                           | 130/200 [12:29<06:13,  5.33s/it]

Progress: 130/200 - Current Accuracy: 0.4923 (64/130)
⚠️ Image PMC8259403_f0015_1572.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  66%|██████████████████████████████████████████████████▍                          | 131/200 [12:33<05:38,  4.90s/it]

⚠️ Image PMC8259403_f0015_1572.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  66%|██████████████████████████████████████████████████▊                          | 132/200 [12:39<05:44,  5.07s/it]

⚠️ Image PMC8259403_f0020_1575.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  66%|███████████████████████████████████████████████████▏                         | 133/200 [12:44<05:43,  5.12s/it]

⚠️ Image PMC8259446_Fig3_1585.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  67%|███████████████████████████████████████████████████▌                         | 134/200 [12:49<05:34,  5.07s/it]

⚠️ Image PMC8259446_Fig3_1586.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  68%|███████████████████████████████████████████████████▉                         | 135/200 [12:54<05:36,  5.18s/it]

Progress: 135/200 - Current Accuracy: 0.4815 (65/135)
⚠️ Image PMC8259467_Figure2_1607.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  68%|████████████████████████████████████████████████████▎                        | 136/200 [12:59<05:24,  5.07s/it]

⚠️ Image PMC8259467_Figure2_1607.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  68%|████████████████████████████████████████████████████▋                        | 137/200 [13:04<05:06,  4.87s/it]

⚠️ Image PMC8259467_Figure2_1608.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  69%|█████████████████████████████████████████████████████▏                       | 138/200 [13:11<05:47,  5.60s/it]

⚠️ Image PMC8259467_Figure2_1609.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  70%|█████████████████████████████████████████████████████▌                       | 139/200 [13:18<06:05,  6.00s/it]

⚠️ Image PMC8259467_Figure4_1612.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  70%|█████████████████████████████████████████████████████▉                       | 140/200 [13:26<06:41,  6.69s/it]

Progress: 140/200 - Current Accuracy: 0.4929 (69/140)
⚠️ Image PMC8259545_Fig2_1618.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  70%|██████████████████████████████████████████████████████▎                      | 141/200 [13:33<06:30,  6.61s/it]

⚠️ Image PMC8259545_Fig2_1620.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  71%|██████████████████████████████████████████████████████▋                      | 142/200 [13:38<06:01,  6.23s/it]

⚠️ Image PMC8259545_Fig2_1620.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  72%|███████████████████████████████████████████████████████                      | 143/200 [13:44<05:47,  6.09s/it]

⚠️ Image PMC8259545_Fig2_1624.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  72%|███████████████████████████████████████████████████████▍                     | 144/200 [13:49<05:29,  5.88s/it]

⚠️ Image PMC8259545_Fig2_1625.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  72%|███████████████████████████████████████████████████████▊                     | 145/200 [13:53<04:55,  5.38s/it]

Progress: 145/200 - Current Accuracy: 0.4828 (70/145)
⚠️ Image PMC8259545_Fig5_1628.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  73%|████████████████████████████████████████████████████████▏                    | 146/200 [13:58<04:42,  5.24s/it]

⚠️ Image PMC8259545_Fig6_1633.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  74%|████████████████████████████████████████████████████████▌                    | 147/200 [14:03<04:32,  5.14s/it]

⚠️ Image PMC8259545_Fig6_1633.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  74%|████████████████████████████████████████████████████████▉                    | 148/200 [14:07<04:09,  4.80s/it]

⚠️ Image PMC8259738_F3_1641.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  74%|█████████████████████████████████████████████████████████▎                   | 149/200 [14:14<04:30,  5.31s/it]

⚠️ Image PMC8259738_F3_1642.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  75%|█████████████████████████████████████████████████████████▊                   | 150/200 [14:20<04:40,  5.61s/it]

Progress: 150/200 - Current Accuracy: 0.5000 (75/150)
⚠️ Image PMC8259738_F3_1642.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  76%|██████████████████████████████████████████████████████████▏                  | 151/200 [14:26<04:42,  5.77s/it]

⚠️ Image PMC8259738_F3_1644.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  76%|██████████████████████████████████████████████████████████▌                  | 152/200 [14:31<04:20,  5.42s/it]

⚠️ Image PMC8259738_F3_1645.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  76%|██████████████████████████████████████████████████████████▉                  | 153/200 [14:36<04:20,  5.55s/it]

⚠️ Image PMC8259738_F3_1646.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  77%|███████████████████████████████████████████████████████████▎                 | 154/200 [14:45<04:56,  6.45s/it]

⚠️ Image PMC8259738_F4_1647.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  78%|███████████████████████████████████████████████████████████▋                 | 155/200 [14:51<04:39,  6.21s/it]

Progress: 155/200 - Current Accuracy: 0.5097 (79/155)
⚠️ Image PMC8259738_F4_1648.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  78%|████████████████████████████████████████████████████████████                 | 156/200 [14:55<04:14,  5.79s/it]

⚠️ Image PMC8259738_F4_1649.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  78%|████████████████████████████████████████████████████████████▍                | 157/200 [15:00<03:53,  5.44s/it]

⚠️ Image PMC8259791_ccr34449-fig-0001_1661.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  79%|████████████████████████████████████████████████████████████▊                | 158/200 [15:06<03:48,  5.43s/it]

⚠️ Image PMC8259791_ccr34449-fig-0001_1661.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  80%|█████████████████████████████████████████████████████████████▏               | 159/200 [15:11<03:45,  5.49s/it]

⚠️ Image PMC8259927_ccr34482-fig-0001_1712.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  80%|█████████████████████████████████████████████████████████████▌               | 160/200 [15:16<03:36,  5.41s/it]

Progress: 160/200 - Current Accuracy: 0.5062 (81/160)
⚠️ Image PMC8259927_ccr34482-fig-0001_1713.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  80%|█████████████████████████████████████████████████████████████▉               | 161/200 [15:22<03:30,  5.38s/it]

⚠️ Image PMC8259930_ccr34412-fig-0004_1719.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  81%|██████████████████████████████████████████████████████████████▎              | 162/200 [15:27<03:22,  5.34s/it]

⚠️ Image PMC8260198_FIG2_1766.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  82%|██████████████████████████████████████████████████████████████▊              | 163/200 [15:32<03:10,  5.15s/it]

⚠️ Image PMC8260211_FIG2_1783.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  82%|███████████████████████████████████████████████████████████████▏             | 164/200 [15:37<03:04,  5.13s/it]

⚠️ Image PMC8260427_Fig2_1809.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  82%|███████████████████████████████████████████████████████████████▌             | 165/200 [15:42<02:59,  5.12s/it]

Progress: 165/200 - Current Accuracy: 0.4970 (82/165)
⚠️ Image PMC8260427_Fig2_1810.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  83%|███████████████████████████████████████████████████████████████▉             | 166/200 [15:50<03:21,  5.94s/it]

⚠️ Image PMC8260427_Fig2_1814.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  84%|████████████████████████████████████████████████████████████████▎            | 167/200 [15:55<03:10,  5.78s/it]

⚠️ Image PMC8260486_jmd212213-fig-0002_1847.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  84%|████████████████████████████████████████████████████████████████▋            | 168/200 [16:02<03:14,  6.08s/it]

⚠️ Image PMC8260486_jmd212213-fig-0002_1847.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  84%|█████████████████████████████████████████████████████████████████            | 169/200 [16:05<02:45,  5.34s/it]

⚠️ Image PMC8260581_Fig2_1857.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  85%|█████████████████████████████████████████████████████████████████▍           | 170/200 [16:11<02:46,  5.54s/it]

Progress: 170/200 - Current Accuracy: 0.4941 (84/170)
⚠️ Image PMC8260581_Fig2_1859.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  86%|█████████████████████████████████████████████████████████████████▊           | 171/200 [16:17<02:42,  5.59s/it]

⚠️ Image PMC8260581_Fig2_1860.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  86%|██████████████████████████████████████████████████████████████████▏          | 172/200 [16:22<02:30,  5.39s/it]

⚠️ Image PMC8260581_Fig2_1861.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  86%|██████████████████████████████████████████████████████████████████▌          | 173/200 [16:30<02:49,  6.29s/it]

⚠️ Image PMC8260593_Fig1_1866.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  87%|██████████████████████████████████████████████████████████████████▉          | 174/200 [16:36<02:34,  5.94s/it]

⚠️ Image PMC8260593_Fig1_1866.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  88%|███████████████████████████████████████████████████████████████████▍         | 175/200 [16:42<02:33,  6.13s/it]

Progress: 175/200 - Current Accuracy: 0.4971 (87/175)
⚠️ Image PMC8260716_Fig1_1910.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  88%|███████████████████████████████████████████████████████████████████▊         | 176/200 [16:54<03:09,  7.92s/it]

⚠️ Image PMC8260716_Fig1_1912.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  88%|████████████████████████████████████████████████████████████████████▏        | 177/200 [17:01<02:56,  7.66s/it]

⚠️ Image PMC8260716_Fig2_1914.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  89%|████████████████████████████████████████████████████████████████████▌        | 178/200 [17:09<02:45,  7.54s/it]

⚠️ Image PMC8260716_Fig2_1915.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  90%|████████████████████████████████████████████████████████████████████▉        | 179/200 [17:14<02:25,  6.91s/it]

⚠️ Image PMC8260716_Fig2_1918.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  90%|█████████████████████████████████████████████████████████████████████▎       | 180/200 [17:20<02:15,  6.78s/it]

Progress: 180/200 - Current Accuracy: 0.5056 (91/180)
⚠️ Image PMC8260716_Fig2_1918.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  90%|█████████████████████████████████████████████████████████████████████▋       | 181/200 [17:25<01:58,  6.21s/it]

⚠️ Image PMC8260745_fig0003_1938.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  91%|██████████████████████████████████████████████████████████████████████       | 182/200 [17:33<02:01,  6.77s/it]

⚠️ Image PMC8260745_fig0003_1940.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  92%|██████████████████████████████████████████████████████████████████████▍      | 183/200 [17:40<01:51,  6.59s/it]

⚠️ Image PMC8260745_fig0003_1940.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  92%|██████████████████████████████████████████████████████████████████████▊      | 184/200 [17:48<01:52,  7.04s/it]

⚠️ Image PMC8260745_fig0003_1941.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  92%|███████████████████████████████████████████████████████████████████████▏     | 185/200 [17:54<01:42,  6.83s/it]

Progress: 185/200 - Current Accuracy: 0.4973 (92/185)
⚠️ Image PMC8260745_fig0003_1943.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  93%|███████████████████████████████████████████████████████████████████████▌     | 186/200 [18:05<01:52,  8.01s/it]

⚠️ Image PMC8260745_fig0003_1943.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  94%|███████████████████████████████████████████████████████████████████████▉     | 187/200 [18:13<01:46,  8.19s/it]

⚠️ Image PMC8260816_jbm410509-fig-0003_1963.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  94%|████████████████████████████████████████████████████████████████████████▍    | 188/200 [18:20<01:32,  7.73s/it]

⚠️ Image PMC8260843_F1_1986.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  94%|████████████████████████████████████████████████████████████████████████▊    | 189/200 [18:26<01:18,  7.14s/it]

⚠️ Image PMC8260843_F3_2001.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  95%|█████████████████████████████████████████████████████████████████████████▏   | 190/200 [18:32<01:07,  6.76s/it]

Progress: 190/200 - Current Accuracy: 0.5105 (97/190)
⚠️ Image PMC8260843_F3_2003.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  96%|█████████████████████████████████████████████████████████████████████████▌   | 191/200 [18:36<00:54,  6.09s/it]

⚠️ Image PMC8260853_F3_2017.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  96%|█████████████████████████████████████████████████████████████████████████▉   | 192/200 [18:41<00:46,  5.82s/it]

⚠️ Image PMC8260853_F3_2018.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  96%|██████████████████████████████████████████████████████████████████████████▎  | 193/200 [18:46<00:38,  5.53s/it]

⚠️ Image PMC8260853_F3_2020.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  97%|██████████████████████████████████████████████████████████████████████████▋  | 194/200 [18:51<00:31,  5.18s/it]

⚠️ Image PMC8260853_F2_2023.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  98%|███████████████████████████████████████████████████████████████████████████  | 195/200 [18:56<00:26,  5.28s/it]

Progress: 195/200 - Current Accuracy: 0.5179 (101/195)
⚠️ Image PMC8260930_F1_2031.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  98%|███████████████████████████████████████████████████████████████████████████▍ | 196/200 [19:01<00:20,  5.03s/it]

⚠️ Image PMC8260930_F1_2033.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  98%|███████████████████████████████████████████████████████████████████████████▊ | 197/200 [19:05<00:14,  4.75s/it]

⚠️ Image PMC8260958_fig0001_2059.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  99%|████████████████████████████████████████████████████████████████████████████▏| 198/200 [19:10<00:09,  4.83s/it]

⚠️ Image PMC8260958_fig0003_2061.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG: 100%|████████████████████████████████████████████████████████████████████████████▌| 199/200 [19:15<00:04,  4.89s/it]

⚠️ Image PMC8260958_fig0002_2071.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG: 100%|█████████████████████████████████████████████████████████████████████████████| 200/200 [19:19<00:00,  5.80s/it]

Progress: 200/200 - Current Accuracy: 0.5100 (102/200)
Final Accuracy: 0.5100 (102/200)
✅ Results saved to medical_multiple_choice_results.json
✅ Multiple-choice medical RAG process completed!
Final accuracy: 0.5100





In [None]:
import os
import base64
import json
import numpy as np
from tqdm import tqdm
from openai import OpenAI
import pandas as pd
import faiss



# Base path to image folder - will be combined with Figure_path
image_folder = "/Users/sheetalpatnaik/Desktop/GENAI/figures"

def build_rag_context(question, options, pmc_q_embedding, pmc_img_embedding, pubmed_dataset,
                     pubmed_q_embeddings, pubmed_index, slake_dataset, slake_index):
    """
    Build RAG context by matching:
    1. PMC question embedding with PubMed question embeddings
    2. PMC image embedding with SLAKE image embeddings
    """
    context = ""

    # 1. Match PMC question embedding with PubMed question embeddings
    top_k_pubmed = 3
    distances, indices = pubmed_index.search(np.array([pmc_q_embedding]), top_k_pubmed)

    # Add PubMed question-answer context
    context += "Related medical information from literature:\n"
    for i, idx in enumerate(indices[0]):
        try:
            pubmed_item = pubmed_dataset[int(idx)]
            context += f"Reference {i+1}:\n"
            context += f"Question: {pubmed_item['question']}\n"

            # Safe way to extract context - avoid direct slicing
            pubmed_context = pubmed_item.get('context', '')
            if len(pubmed_context) > 300:
                pubmed_context = pubmed_context[:300] + "..."
            context += f"Context: {pubmed_context}\n"

            # Using 'long_answer' if available, otherwise 'final_decision'
            answer_text = pubmed_item.get('long_answer', pubmed_item.get('final_decision', 'No answer available'))
            context += f"Answer: {answer_text}\n\n"
        except Exception as e:
            print(f"Error accessing pubmed_dataset at index {idx}: {e}")

    # 2. Match PMC image embedding with SLAKE image embeddings
    top_k_slake = 3
    distances, indices = slake_index.search(np.array([pmc_img_embedding]), top_k_slake)

    # Add SLAKE image-question-answer context
    context += "Information from similar medical images:\n"
    for i, idx in enumerate(indices[0]):
        try:
            slake_item = slake_dataset[int(idx)]
            context += f"Similar Image {i+1}:\n"
            context += f"Question: {slake_item['question']}\n"
            context += f"Answer: {slake_item['answer']}\n\n"
        except Exception as e:
            print(f"Error accessing slake_dataset at index {idx}: {e}")

    return context

def find_best_matching_embedding(question, pmc_questions, pmc_q_embeddings):
    """Find the best matching question embedding from PMC data"""
    best_q_idx = 0  # Default to the first one if no match found
    best_match_score = float('-inf')

    for q_idx, q in enumerate(pmc_questions):
        # Simple text matching heuristic
        if question.lower() in q.lower() or q.lower() in question.lower():
            match_score = len(set(question.lower().split()) & set(q.lower().split()))
            if match_score > best_match_score:
                best_match_score = match_score
                best_q_idx = q_idx

    return pmc_q_embeddings[best_q_idx]

def find_image_embedding(img_name, pmc_image_names, pmc_img_embeddings):
    """Find the image embedding from PMC data"""
    try:
        img_idx = pmc_image_names.index(img_name)
        return pmc_img_embeddings[img_idx]
    except ValueError:
        # If image not found, return the first embedding as fallback
        print(f"⚠️ Image {img_name} not found in PMC embeddings, using fallback")
        return pmc_img_embeddings[0]

def extract_option_letter(prediction_text):
    """
    Extract the option letter (A, B, C, or D) from the prediction text.
    Returns the first occurrence of A, B, C, or D (case-insensitive).
    """
    prediction_lower = prediction_text.lower()

    # First check for patterns like "Option A" or "A." at the beginning
    import re
    for pattern in [r"option\s+([abcd])", r"^([abcd])[.:]", r"^([abcd])\s+"]:
        matches = re.findall(pattern, prediction_lower)
        if matches:
            return matches[0].upper()

    # Then check for any occurrence of A, B, C, D
    for option in ['a', 'b', 'c', 'd']:
        if option in prediction_lower:
            # Return the uppercase letter
            return option.upper()

    # If no option is found, return the first word as a fallback
    words = prediction_text.split()
    if words:
        return words[0].upper()

    # Final fallback: return 'A'
    return 'A'

def run_multiple_choice_rag(samples, pmc_questions, pmc_q_embeddings, pmc_image_names, pmc_img_embeddings,
                   pubmed_dataset, pubmed_q_embeddings, pubmed_index,
                   slake_dataset, slake_embeddings, slake_index, num_samples=20):
    """
    Run the multiple-choice medical RAG system

    Parameters:
    - samples: List of dictionaries from DataFrame with correct column names
    - Other parameters: The various embeddings and datasets
    """
    # Limit to specified number of samples
    if num_samples and len(samples) > num_samples:
        samples = samples[:num_samples]

    results = []
    correct_count = 0
    total_processed = 0

    for i, sample in tqdm(enumerate(samples), total=len(samples), desc="Running Multiple-Choice Medical RAG"):
        try:
            # Extract data from sample using correct column names
            figure_path = sample["Figure_path"]
            img_name = os.path.basename(figure_path)  # Extract just the filename for embedding lookup
            question = sample["Question"]

            # Extract options using correct column names
            options = {
                'A': sample.get('Choice A', ''),
                'B': sample.get('Choice B', ''),
                'C': sample.get('Choice C', ''),
                'D': sample.get('Choice D', '')
            }

            # Get the ground truth answer
            ground_truth = sample.get("Answer", "")

            # Get the full image path
            if os.path.isabs(figure_path):
                img_path = figure_path
            else:
                img_path = os.path.join(image_folder, figure_path)

            if not os.path.isfile(img_path):
                print(f"⚠️ Image not found: {img_path}")
                continue

            # Find best matching question embedding
            pmc_q_embedding = find_best_matching_embedding(question, pmc_questions, pmc_q_embeddings)

            # Find image embedding using the basename of the image file
            pmc_img_embedding = find_image_embedding(img_name, pmc_image_names, pmc_img_embeddings)

            # Build RAG context
            context = build_rag_context(
                question, options, pmc_q_embedding, pmc_img_embedding,
                pubmed_dataset, pubmed_q_embeddings, pubmed_index,
                slake_dataset, slake_index
            )

            # Format options for the prompt
            options_text = "\n".join([f"{key}: {value}" for key, value in options.items() if value])

            # Prepare prompt specifically for multiple-choice
            prompt = f"""You are a specialized medical AI assistant. Answer the following multiple-choice medical question based on the image provided and the retrieved context.

Question: {question}

Options:
{options_text}

Retrieved Context:
{context}

Please analyze the image and the retrieved context carefully, then select ONLY ONE option from choices A, B, C, or D that best answers the question.

Your response must begin with the letter of your chosen option (A, B, C, or D), followed by your explanation. For example: "A. This is the correct answer because..."

Only choose from the provided options. Do not create your own answer."""

            # Read and encode image
            with open(img_path, "rb") as f:
                encoded_image = base64.b64encode(f.read()).decode("utf-8")

            # Make GPT-4 Turbo API call
            try:
                response = client.chat.completions.create(
                    model="gpt-4-turbo",  # Use GPT-4 Turbo
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {"type": "text", "text": prompt},
                                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
                            ]
                        }
                    ],
                    temperature=0,
                    max_tokens=500
                )
            except Exception as e:
                print(f"Error with gpt-4-turbo: {e}")
                # Fallback to gpt-4o
                try:
                    response = client.chat.completions.create(
                        model="gpt-4o",  # Fallback to GPT-4o
                        messages=[
                            {
                                "role": "user",
                                "content": [
                                    {"type": "text", "text": prompt},
                                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
                                ]
                            }
                        ],
                        temperature=0,
                        max_tokens=500
                    )
                except Exception as e2:
                    raise Exception(f"Both models failed. gpt-4-turbo error: {e}, gpt-4o error: {e2}")

            # Extract prediction
            prediction_text = response.choices[0].message.content.strip()

            # Extract the selected option (A, B, C, or D)
            selected_option = extract_option_letter(prediction_text)

            # Check if the answer is correct
            is_correct = selected_option.upper() == ground_truth.upper() if ground_truth else False
            if is_correct:
                correct_count += 1

            # Add to results
            result = {
                "figure_path": figure_path,
                "question": question,
                "options": options,
                "context_length": len(context),
                "prediction_text": prediction_text,
                "selected_option": selected_option
            }

            if ground_truth:
                result["ground_truth"] = ground_truth
                result["is_correct"] = is_correct

            results.append(result)
            total_processed += 1

            # Print progress
            if (i + 1) % 5 == 0 or i == len(samples) - 1:
                if total_processed > 0:
                    current_accuracy = correct_count / total_processed
                    print(f"Progress: {i+1}/{len(samples)} - Current Accuracy: {current_accuracy:.4f} ({correct_count}/{total_processed})")

        except Exception as e:
            print(f"❌ Error processing sample {i}: {e}")

    # Calculate final accuracy
    accuracy = correct_count / total_processed if total_processed > 0 else 0
    print(f"Final Accuracy: {accuracy:.4f} ({correct_count}/{total_processed})")

    # Save results
    output_file = "medical_multiple_choice_results.json"
    with open(output_file, "w") as f:
        json.dump({
            "results": results,
            "accuracy": accuracy,
            "correct_count": correct_count,
            "total_samples": total_processed
        }, f, indent=2)

    print(f"✅ Results saved to {output_file}")

    return results, accuracy

def main():
    print("Starting multiple-choice medical RAG system...")

    # Extract pmc_image_names from pmc_img_data
    pmc_image_names = pmc_img_data["image_names"]

    # Convert DataFrame to list of dictionaries for easier processing
    samples = df.to_dict('records')

    # Run the RAG system
    num_samples = 300  # Adjust as needed
    results, accuracy = run_multiple_choice_rag(
        samples,
        pmc_questions, pmc_q_embeddings,
        pmc_image_names, pmc_img_embeddings,
        pubmed_dataset, pubmed_q_embeddings, pubmed_index,
        slake_dataset, slake_embeddings, slake_index,
        num_samples=num_samples
    )

    print("✅ Multiple-choice medical RAG process completed!")
    print(f"Final accuracy: {accuracy:.4f}")

if __name__ == "__main__":
    main()

Starting multiple-choice medical RAG system...


Running Multiple-Choice Medical RAG:   0%|                                                                                       | 0/300 [00:00<?, ?it/s]

⚠️ Image PMC8253867_Fig2_41.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   0%|▎                                                                              | 1/300 [00:06<33:52,  6.80s/it]

⚠️ Image PMC8253867_Fig2_42.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   1%|▌                                                                              | 2/300 [00:12<31:04,  6.26s/it]

⚠️ Image PMC8253873_Fig6_45.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   1%|▊                                                                              | 3/300 [00:19<33:21,  6.74s/it]

⚠️ Image PMC8253873_Fig6_46.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   1%|█                                                                              | 4/300 [00:27<33:52,  6.87s/it]

⚠️ Image PMC8253873_Fig8_49.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   2%|█▎                                                                             | 5/300 [00:33<33:23,  6.79s/it]

Progress: 5/300 - Current Accuracy: 0.4000 (2/5)
⚠️ Image PMC8253908_fig2_52.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   2%|█▌                                                                             | 6/300 [00:42<35:57,  7.34s/it]

⚠️ Image PMC8253908_fig2_54.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   2%|█▊                                                                             | 7/300 [00:47<33:12,  6.80s/it]

⚠️ Image PMC8253963_FIG3_57.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   3%|██                                                                             | 8/300 [00:56<36:32,  7.51s/it]

⚠️ Image PMC8253999_fig1_63.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   3%|██▎                                                                            | 9/300 [01:05<37:30,  7.73s/it]

⚠️ Image PMC8254247_Fig5_90.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   3%|██▌                                                                           | 10/300 [01:10<33:35,  6.95s/it]

Progress: 10/300 - Current Accuracy: 0.5000 (5/10)
⚠️ Image PMC8254247_Fig5_92.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   4%|██▊                                                                           | 11/300 [01:16<32:36,  6.77s/it]

⚠️ Image PMC8254671_Fig3_214.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   4%|███                                                                           | 12/300 [01:21<30:13,  6.30s/it]

⚠️ Image PMC8254671_Fig3_214.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   4%|███▍                                                                          | 13/300 [01:31<34:30,  7.21s/it]

⚠️ Image PMC8254711_Fig2_243.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   5%|███▋                                                                          | 14/300 [01:37<33:34,  7.04s/it]

⚠️ Image PMC8254711_Fig2_244.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   5%|███▉                                                                          | 15/300 [01:46<36:04,  7.59s/it]

Progress: 15/300 - Current Accuracy: 0.4667 (7/15)
⚠️ Image PMC8254711_Fig2_245.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   5%|████▏                                                                         | 16/300 [01:54<36:10,  7.64s/it]

⚠️ Image PMC8254711_Fig2_247.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   6%|████▍                                                                         | 17/300 [01:59<31:51,  6.75s/it]

⚠️ Image PMC8254711_Fig2_248.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   6%|████▋                                                                         | 18/300 [02:04<29:38,  6.31s/it]

⚠️ Image PMC8254711_Fig2_248.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   6%|████▉                                                                         | 19/300 [02:11<30:33,  6.53s/it]

⚠️ Image PMC8254711_Fig2_251.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   7%|█████▏                                                                        | 20/300 [02:19<32:59,  7.07s/it]

Progress: 20/300 - Current Accuracy: 0.4000 (8/20)
⚠️ Image PMC8254839_Fig7_267.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   7%|█████▍                                                                        | 21/300 [02:25<31:12,  6.71s/it]

⚠️ Image PMC8254839_Fig8_275.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   7%|█████▋                                                                        | 22/300 [02:32<31:09,  6.73s/it]

⚠️ Image PMC8254839_Fig8_276.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   8%|█████▉                                                                        | 23/300 [02:37<29:18,  6.35s/it]

⚠️ Image PMC8254839_Fig8_281.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   8%|██████▏                                                                       | 24/300 [02:43<28:35,  6.21s/it]

⚠️ Image PMC8254990_Fig2_325.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   8%|██████▌                                                                       | 25/300 [02:51<30:19,  6.62s/it]

Progress: 25/300 - Current Accuracy: 0.4800 (12/25)
⚠️ Image PMC8254990_Fig2_325.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   9%|██████▊                                                                       | 26/300 [02:57<29:44,  6.51s/it]

⚠️ Image PMC8254990_Fig2_326.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   9%|███████                                                                       | 27/300 [03:02<26:47,  5.89s/it]

⚠️ Image PMC8255017_Fig1_348.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:   9%|███████▎                                                                      | 28/300 [03:10<29:37,  6.54s/it]

⚠️ Image PMC8255017_Fig1_348.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  10%|███████▌                                                                      | 29/300 [03:17<30:32,  6.76s/it]

⚠️ Image PMC8255017_Fig1_349.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  10%|███████▊                                                                      | 30/300 [03:23<30:00,  6.67s/it]

Progress: 30/300 - Current Accuracy: 0.4333 (13/30)
⚠️ Image PMC8255017_Fig1_349.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  10%|████████                                                                      | 31/300 [03:30<30:17,  6.75s/it]

⚠️ Image PMC8255034_DEV198820F2_366.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  11%|████████▎                                                                     | 32/300 [03:38<31:54,  7.15s/it]

⚠️ Image PMC8255034_DEV198820F1_392.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  11%|████████▌                                                                     | 33/300 [03:48<34:44,  7.81s/it]

⚠️ Image PMC8255034_DEV198820F1_394.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  11%|████████▊                                                                     | 34/300 [03:51<29:00,  6.54s/it]

⚠️ Image PMC8255034_DEV198820F1_401.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  12%|█████████                                                                     | 35/300 [03:57<28:18,  6.41s/it]

Progress: 35/300 - Current Accuracy: 0.4286 (15/35)
⚠️ Image PMC8255034_DEV198820F1_404.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  12%|█████████▎                                                                    | 36/300 [04:05<29:55,  6.80s/it]

⚠️ Image PMC8255034_DEV198820F6_419.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  12%|█████████▌                                                                    | 37/300 [04:11<28:34,  6.52s/it]

⚠️ Image PMC8255034_DEV198820F6_422.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  13%|█████████▉                                                                    | 38/300 [04:15<25:38,  5.87s/it]

⚠️ Image PMC8255034_DEV198820F6_426.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  13%|██████████▏                                                                   | 39/300 [04:20<23:45,  5.46s/it]

⚠️ Image PMC8255034_DEV198820F6_430.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  13%|██████████▍                                                                   | 40/300 [04:25<22:43,  5.24s/it]

Progress: 40/300 - Current Accuracy: 0.4500 (18/40)
⚠️ Image PMC8255034_DEV198820F7_431.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  14%|██████████▋                                                                   | 41/300 [04:30<23:27,  5.43s/it]

⚠️ Image PMC8255034_DEV198820F7_431.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  14%|██████████▉                                                                   | 42/300 [04:39<27:32,  6.40s/it]

⚠️ Image PMC8255034_DEV198820F7_448.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  14%|███████████▏                                                                  | 43/300 [04:46<28:09,  6.57s/it]

⚠️ Image PMC8255034_DEV198820F7_452.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  15%|███████████▍                                                                  | 44/300 [04:55<31:25,  7.36s/it]

⚠️ Image PMC8255114_FIG1_467.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  15%|███████████▋                                                                  | 45/300 [05:03<31:44,  7.47s/it]

Progress: 45/300 - Current Accuracy: 0.4222 (19/45)
⚠️ Image PMC8255223_fig0005_470.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  15%|███████████▉                                                                  | 46/300 [05:11<32:26,  7.66s/it]

⚠️ Image PMC8255223_fig0005_471.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  16%|████████████▏                                                                 | 47/300 [05:17<29:32,  7.00s/it]

⚠️ Image PMC8255279_iju512297-fig-0001_508.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  16%|████████████▍                                                                 | 48/300 [05:21<26:05,  6.21s/it]

⚠️ Image PMC8255284_iju512293-fig-0001_514.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  16%|████████████▋                                                                 | 49/300 [05:27<25:56,  6.20s/it]

⚠️ Image PMC8255286_iju512299-fig-0001_523.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  17%|█████████████                                                                 | 50/300 [05:33<25:21,  6.09s/it]

Progress: 50/300 - Current Accuracy: 0.4200 (21/50)
⚠️ Image PMC8255286_iju512299-fig-0001_527.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  17%|█████████████▎                                                                | 51/300 [05:39<24:57,  6.02s/it]

⚠️ Image PMC8255286_iju512299-fig-0002_529.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  17%|█████████████▌                                                                | 52/300 [05:45<25:09,  6.09s/it]

⚠️ Image PMC8255286_iju512299-fig-0002_531.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  18%|█████████████▊                                                                | 53/300 [05:52<26:11,  6.36s/it]

⚠️ Image PMC8255365_F3_556.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  18%|██████████████                                                                | 54/300 [05:58<25:20,  6.18s/it]

⚠️ Image PMC8255365_F4_565.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  18%|██████████████▎                                                               | 55/300 [06:09<31:16,  7.66s/it]

Progress: 55/300 - Current Accuracy: 0.4182 (23/55)
⚠️ Image PMC8255484_F1_574.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  19%|██████████████▌                                                               | 56/300 [06:15<28:39,  7.05s/it]

⚠️ Image PMC8255484_F1_574.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  19%|██████████████▊                                                               | 57/300 [06:19<25:05,  6.19s/it]

⚠️ Image PMC8255916_f1_646.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  19%|███████████████                                                               | 58/300 [06:24<24:18,  6.03s/it]

⚠️ Image PMC8255931_F4_664.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  20%|███████████████▎                                                              | 59/300 [06:30<23:15,  5.79s/it]

⚠️ Image PMC8255931_F4_666.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  20%|███████████████▌                                                              | 60/300 [06:34<21:36,  5.40s/it]

Progress: 60/300 - Current Accuracy: 0.4500 (27/60)
⚠️ Image PMC8255946_RSTA20200207F6_679.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  20%|███████████████▊                                                              | 61/300 [06:40<21:49,  5.48s/it]

⚠️ Image PMC8255948_RSTA20200204F2_687.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  21%|████████████████                                                              | 62/300 [06:44<20:22,  5.14s/it]

⚠️ Image PMC8256407_Fig1_796.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  21%|████████████████▍                                                             | 63/300 [06:49<20:14,  5.12s/it]

⚠️ Image PMC8256407_Fig1_799.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  21%|████████████████▋                                                             | 64/300 [06:54<19:18,  4.91s/it]

⚠️ Image PMC8256407_Fig1_800.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  22%|████████████████▉                                                             | 65/300 [06:58<18:12,  4.65s/it]

Progress: 65/300 - Current Accuracy: 0.4462 (29/65)
⚠️ Image PMC8256407_Fig1_801.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  22%|█████████████████▏                                                            | 66/300 [07:03<18:55,  4.85s/it]

⚠️ Image PMC8256546_Fig3_834.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  22%|█████████████████▍                                                            | 67/300 [07:09<19:51,  5.11s/it]

⚠️ Image PMC8256567_Fig1_846.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  23%|█████████████████▋                                                            | 68/300 [07:19<25:16,  6.54s/it]

⚠️ Image PMC8256567_Fig2_856.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  23%|█████████████████▉                                                            | 69/300 [07:24<23:39,  6.15s/it]

⚠️ Image PMC8256567_Fig2_857.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  23%|██████████████████▏                                                           | 70/300 [07:30<24:02,  6.27s/it]

Progress: 70/300 - Current Accuracy: 0.4286 (30/70)
⚠️ Image PMC8256567_Fig2_857.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  24%|██████████████████▍                                                           | 71/300 [07:37<24:07,  6.32s/it]

⚠️ Image PMC8256567_Fig2_859.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  24%|██████████████████▋                                                           | 72/300 [07:43<23:26,  6.17s/it]

⚠️ Image PMC8256567_Fig2_860.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  24%|██████████████████▉                                                           | 73/300 [07:48<22:34,  5.97s/it]

⚠️ Image PMC8256567_Fig2_863.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  25%|███████████████████▏                                                          | 74/300 [07:55<23:49,  6.33s/it]

⚠️ Image PMC8256994_F1_922.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  25%|███████████████████▌                                                          | 75/300 [08:00<22:29,  6.00s/it]

Progress: 75/300 - Current Accuracy: 0.4267 (32/75)
⚠️ Image PMC8257052_F1_943.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  25%|███████████████████▊                                                          | 76/300 [08:06<21:24,  5.74s/it]

⚠️ Image PMC8257052_F1_944.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  26%|████████████████████                                                          | 77/300 [08:10<20:20,  5.47s/it]

⚠️ Image PMC8257052_F1_946.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  26%|████████████████████▎                                                         | 78/300 [08:16<20:41,  5.59s/it]

⚠️ Image PMC8257078_F1_949.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  26%|████████████████████▌                                                         | 79/300 [08:22<20:15,  5.50s/it]

⚠️ Image PMC8257078_F1_951.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  27%|████████████████████▊                                                         | 80/300 [08:29<21:56,  5.98s/it]

Progress: 80/300 - Current Accuracy: 0.4500 (36/80)
⚠️ Image PMC8257462_FIG3_1016.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  27%|█████████████████████                                                         | 81/300 [08:36<22:44,  6.23s/it]

⚠️ Image PMC8257462_FIG3_1016.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  27%|█████████████████████▎                                                        | 82/300 [08:42<23:17,  6.41s/it]

⚠️ Image PMC8257462_FIG3_1017.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  28%|█████████████████████▌                                                        | 83/300 [08:48<22:00,  6.09s/it]

⚠️ Image PMC8257462_FIG3_1017.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  28%|█████████████████████▊                                                        | 84/300 [08:53<21:00,  5.84s/it]

⚠️ Image PMC8257540_Fig3_1066.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  28%|██████████████████████                                                        | 85/300 [08:58<19:59,  5.58s/it]

Progress: 85/300 - Current Accuracy: 0.4588 (39/85)
⚠️ Image PMC8257540_Fig3_1074.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  29%|██████████████████████▎                                                       | 86/300 [09:02<17:50,  5.00s/it]

⚠️ Image PMC8257586_Fig3_1077.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  29%|██████████████████████▌                                                       | 87/300 [09:07<17:57,  5.06s/it]

⚠️ Image PMC8257586_Fig3_1082.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  29%|██████████████████████▉                                                       | 88/300 [09:11<17:07,  4.85s/it]

⚠️ Image PMC8257586_Fig3_1084.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  30%|███████████████████████▏                                                      | 89/300 [09:16<17:24,  4.95s/it]

⚠️ Image PMC8257803_Fig2_1155.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  30%|███████████████████████▍                                                      | 90/300 [09:22<18:29,  5.29s/it]

Progress: 90/300 - Current Accuracy: 0.4667 (42/90)
⚠️ Image PMC8257847_Fig2_1191.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  30%|███████████████████████▋                                                      | 91/300 [09:28<18:50,  5.41s/it]

⚠️ Image PMC8257847_Fig2_1191.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  31%|███████████████████████▉                                                      | 92/300 [09:32<17:17,  4.99s/it]

⚠️ Image PMC8257847_Fig2_1192.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  31%|████████████████████████▏                                                     | 93/300 [09:35<15:31,  4.50s/it]

⚠️ Image PMC8258112_F3_1227.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  31%|████████████████████████▍                                                     | 94/300 [09:40<15:42,  4.57s/it]

⚠️ Image PMC8258112_F3_1228.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  32%|████████████████████████▋                                                     | 95/300 [09:44<15:12,  4.45s/it]

Progress: 95/300 - Current Accuracy: 0.4737 (45/95)
⚠️ Image PMC8258112_F3_1231.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  32%|████████████████████████▉                                                     | 96/300 [09:49<15:48,  4.65s/it]

⚠️ Image PMC8258112_F3_1232.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  32%|█████████████████████████▏                                                    | 97/300 [09:57<18:10,  5.37s/it]

⚠️ Image PMC8258147_F2_1242.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  33%|█████████████████████████▍                                                    | 98/300 [10:01<17:22,  5.16s/it]

⚠️ Image PMC8258164_F10_1252.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  33%|█████████████████████████▋                                                    | 99/300 [10:06<16:46,  5.01s/it]

⚠️ Image PMC8258164_F10_1253.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  33%|█████████████████████████▋                                                   | 100/300 [10:12<17:56,  5.38s/it]

Progress: 100/300 - Current Accuracy: 0.4700 (47/100)
⚠️ Image PMC8258354_tca14000-fig-0001_1269.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  34%|█████████████████████████▉                                                   | 101/300 [10:17<17:35,  5.30s/it]

⚠️ Image PMC8258355_tca13992-fig-0003_1277.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  34%|██████████████████████████▏                                                  | 102/300 [10:21<16:16,  4.93s/it]

⚠️ Image PMC8258355_tca13992-fig-0003_1278.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  34%|██████████████████████████▍                                                  | 103/300 [10:26<16:18,  4.97s/it]

⚠️ Image PMC8258366_tca14010-fig-0001_1284.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  35%|██████████████████████████▋                                                  | 104/300 [10:30<15:12,  4.65s/it]

⚠️ Image PMC8258366_tca14010-fig-0001_1286.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  35%|██████████████████████████▉                                                  | 105/300 [10:35<14:56,  4.60s/it]

Progress: 105/300 - Current Accuracy: 0.4857 (51/105)
⚠️ Image PMC8258383_fig4_1290.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  35%|███████████████████████████▏                                                 | 106/300 [10:39<14:26,  4.47s/it]

⚠️ Image PMC8258435_f3_1306.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  36%|███████████████████████████▍                                                 | 107/300 [10:43<14:14,  4.43s/it]

⚠️ Image PMC8258435_f3_1308.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  36%|███████████████████████████▋                                                 | 108/300 [10:48<14:08,  4.42s/it]

⚠️ Image PMC8258435_f3_1314.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  36%|███████████████████████████▉                                                 | 109/300 [10:54<15:37,  4.91s/it]

⚠️ Image PMC8258493_Fig7_1322.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  37%|████████████████████████████▏                                                | 110/300 [11:01<17:41,  5.59s/it]

Progress: 110/300 - Current Accuracy: 0.4818 (53/110)
⚠️ Image PMC8258493_Fig7_1322.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  37%|████████████████████████████▍                                                | 111/300 [11:07<18:16,  5.80s/it]

⚠️ Image PMC8258493_Fig7_1323.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  37%|████████████████████████████▋                                                | 112/300 [11:12<16:54,  5.40s/it]

⚠️ Image PMC8258720_fig1-14574969211000546_1398.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  38%|█████████████████████████████                                                | 113/300 [11:15<15:24,  4.94s/it]

⚠️ Image PMC8258786_fig0001_1408.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  38%|█████████████████████████████▎                                               | 114/300 [11:19<14:03,  4.54s/it]

⚠️ Image PMC8259209_Fig2_1533.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  38%|█████████████████████████████▌                                               | 115/300 [11:27<17:21,  5.63s/it]

Progress: 115/300 - Current Accuracy: 0.4870 (56/115)
⚠️ Image PMC8259360_Fig1_1548.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  39%|█████████████████████████████▊                                               | 116/300 [11:32<16:31,  5.39s/it]

⚠️ Image PMC8259360_Fig1_1549.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  39%|██████████████████████████████                                               | 117/300 [11:37<16:28,  5.40s/it]

⚠️ Image PMC8259360_Fig2_1551.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  39%|██████████████████████████████▎                                              | 118/300 [11:43<16:01,  5.28s/it]

⚠️ Image PMC8259360_Fig2_1552.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  40%|██████████████████████████████▌                                              | 119/300 [11:48<15:42,  5.20s/it]

⚠️ Image PMC8259360_Fig2_1552.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  40%|██████████████████████████████▊                                              | 120/300 [11:53<15:32,  5.18s/it]

Progress: 120/300 - Current Accuracy: 0.4833 (58/120)
⚠️ Image PMC8259360_Fig2_1553.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  40%|███████████████████████████████                                              | 121/300 [11:57<14:34,  4.88s/it]

⚠️ Image PMC8259360_Fig2_1553.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  41%|███████████████████████████████▎                                             | 122/300 [12:01<14:05,  4.75s/it]

⚠️ Image PMC8259393_fig2_1565.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  41%|███████████████████████████████▌                                             | 123/300 [12:05<13:25,  4.55s/it]

⚠️ Image PMC8259403_f0005_1566.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  41%|███████████████████████████████▊                                             | 124/300 [12:11<14:24,  4.91s/it]

⚠️ Image PMC8259403_f0005_1566.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  42%|████████████████████████████████                                             | 125/300 [12:16<14:01,  4.81s/it]

Progress: 125/300 - Current Accuracy: 0.4880 (61/125)
⚠️ Image PMC8259403_f0010_1569.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  42%|████████████████████████████████▎                                            | 126/300 [12:21<14:09,  4.88s/it]

⚠️ Image PMC8259403_f0010_1569.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  42%|████████████████████████████████▌                                            | 127/300 [12:26<14:15,  4.94s/it]

⚠️ Image PMC8259403_f0010_1570.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  43%|████████████████████████████████▊                                            | 128/300 [12:32<15:19,  5.35s/it]

⚠️ Image PMC8259403_f0010_1570.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  43%|█████████████████████████████████                                            | 129/300 [12:38<15:37,  5.48s/it]

⚠️ Image PMC8259403_f0015_1571.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  43%|█████████████████████████████████▎                                           | 130/300 [12:43<15:02,  5.31s/it]

Progress: 130/300 - Current Accuracy: 0.4923 (64/130)
⚠️ Image PMC8259403_f0015_1572.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  44%|█████████████████████████████████▌                                           | 131/300 [12:49<15:26,  5.48s/it]

⚠️ Image PMC8259403_f0015_1572.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  44%|█████████████████████████████████▉                                           | 132/300 [12:56<17:04,  6.10s/it]

⚠️ Image PMC8259403_f0020_1575.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  44%|██████████████████████████████████▏                                          | 133/300 [13:02<17:05,  6.14s/it]

⚠️ Image PMC8259446_Fig3_1585.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  45%|██████████████████████████████████▍                                          | 134/300 [13:07<15:28,  5.59s/it]

⚠️ Image PMC8259446_Fig3_1586.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  45%|██████████████████████████████████▋                                          | 135/300 [13:12<14:54,  5.42s/it]

Progress: 135/300 - Current Accuracy: 0.4815 (65/135)
⚠️ Image PMC8259467_Figure2_1607.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  45%|██████████████████████████████████▉                                          | 136/300 [13:19<15:54,  5.82s/it]

⚠️ Image PMC8259467_Figure2_1607.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  46%|███████████████████████████████████▏                                         | 137/300 [13:22<14:14,  5.24s/it]

⚠️ Image PMC8259467_Figure2_1608.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  46%|███████████████████████████████████▍                                         | 138/300 [13:25<12:12,  4.52s/it]

⚠️ Image PMC8259467_Figure2_1609.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  46%|███████████████████████████████████▋                                         | 139/300 [13:31<13:18,  4.96s/it]

⚠️ Image PMC8259467_Figure4_1612.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  47%|███████████████████████████████████▉                                         | 140/300 [13:37<13:42,  5.14s/it]

Progress: 140/300 - Current Accuracy: 0.4857 (68/140)
⚠️ Image PMC8259545_Fig2_1618.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  47%|████████████████████████████████████▏                                        | 141/300 [13:42<13:19,  5.03s/it]

⚠️ Image PMC8259545_Fig2_1620.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  47%|████████████████████████████████████▍                                        | 142/300 [13:48<14:12,  5.39s/it]

⚠️ Image PMC8259545_Fig2_1620.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  48%|████████████████████████████████████▋                                        | 143/300 [13:52<13:23,  5.12s/it]

⚠️ Image PMC8259545_Fig2_1624.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  48%|████████████████████████████████████▉                                        | 144/300 [13:57<13:14,  5.10s/it]

⚠️ Image PMC8259545_Fig2_1625.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  48%|█████████████████████████████████████▏                                       | 145/300 [14:01<11:56,  4.62s/it]

Progress: 145/300 - Current Accuracy: 0.4690 (68/145)
⚠️ Image PMC8259545_Fig5_1628.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  49%|█████████████████████████████████████▍                                       | 146/300 [14:08<13:48,  5.38s/it]

⚠️ Image PMC8259545_Fig6_1633.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  49%|█████████████████████████████████████▋                                       | 147/300 [14:14<14:27,  5.67s/it]

⚠️ Image PMC8259545_Fig6_1633.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  49%|█████████████████████████████████████▉                                       | 148/300 [14:18<13:10,  5.20s/it]

⚠️ Image PMC8259738_F3_1641.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  50%|██████████████████████████████████████▏                                      | 149/300 [14:25<14:18,  5.69s/it]

⚠️ Image PMC8259738_F3_1642.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  50%|██████████████████████████████████████▌                                      | 150/300 [14:29<12:35,  5.03s/it]

Progress: 150/300 - Current Accuracy: 0.4800 (72/150)
⚠️ Image PMC8259738_F3_1642.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  50%|██████████████████████████████████████▊                                      | 151/300 [14:34<12:15,  4.94s/it]

⚠️ Image PMC8259738_F3_1644.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  51%|███████████████████████████████████████                                      | 152/300 [14:37<11:12,  4.54s/it]

⚠️ Image PMC8259738_F3_1645.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  51%|███████████████████████████████████████▎                                     | 153/300 [14:44<12:57,  5.29s/it]

⚠️ Image PMC8259738_F3_1646.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  51%|███████████████████████████████████████▌                                     | 154/300 [14:50<13:25,  5.52s/it]

⚠️ Image PMC8259738_F4_1647.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  52%|███████████████████████████████████████▊                                     | 155/300 [14:57<14:14,  5.89s/it]

Progress: 155/300 - Current Accuracy: 0.4903 (76/155)
⚠️ Image PMC8259738_F4_1648.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  52%|████████████████████████████████████████                                     | 156/300 [15:01<12:41,  5.29s/it]

⚠️ Image PMC8259738_F4_1649.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  52%|████████████████████████████████████████▎                                    | 157/300 [15:05<11:47,  4.95s/it]

⚠️ Image PMC8259791_ccr34449-fig-0001_1661.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  53%|████████████████████████████████████████▌                                    | 158/300 [15:10<11:24,  4.82s/it]

⚠️ Image PMC8259791_ccr34449-fig-0001_1661.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  53%|████████████████████████████████████████▊                                    | 159/300 [15:14<11:16,  4.80s/it]

⚠️ Image PMC8259927_ccr34482-fig-0001_1712.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  53%|█████████████████████████████████████████                                    | 160/300 [15:20<11:41,  5.01s/it]

Progress: 160/300 - Current Accuracy: 0.4938 (79/160)
⚠️ Image PMC8259927_ccr34482-fig-0001_1713.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  54%|█████████████████████████████████████████▎                                   | 161/300 [15:25<11:50,  5.11s/it]

⚠️ Image PMC8259930_ccr34412-fig-0004_1719.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  54%|█████████████████████████████████████████▌                                   | 162/300 [15:31<12:07,  5.27s/it]

⚠️ Image PMC8260198_FIG2_1766.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  54%|█████████████████████████████████████████▊                                   | 163/300 [15:38<13:19,  5.83s/it]

⚠️ Image PMC8260211_FIG2_1783.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  55%|██████████████████████████████████████████                                   | 164/300 [15:43<12:56,  5.71s/it]

⚠️ Image PMC8260427_Fig2_1809.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  55%|██████████████████████████████████████████▎                                  | 165/300 [15:49<12:35,  5.60s/it]

Progress: 165/300 - Current Accuracy: 0.4848 (80/165)
⚠️ Image PMC8260427_Fig2_1810.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  55%|██████████████████████████████████████████▌                                  | 166/300 [15:55<13:08,  5.88s/it]

⚠️ Image PMC8260427_Fig2_1814.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  56%|██████████████████████████████████████████▊                                  | 167/300 [16:00<12:21,  5.57s/it]

⚠️ Image PMC8260486_jmd212213-fig-0002_1847.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  56%|███████████████████████████████████████████                                  | 168/300 [16:06<12:24,  5.64s/it]

⚠️ Image PMC8260486_jmd212213-fig-0002_1847.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  56%|███████████████████████████████████████████▍                                 | 169/300 [16:09<10:57,  5.02s/it]

⚠️ Image PMC8260581_Fig2_1857.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  57%|███████████████████████████████████████████▋                                 | 170/300 [16:14<10:38,  4.91s/it]

Progress: 170/300 - Current Accuracy: 0.4824 (82/170)
⚠️ Image PMC8260581_Fig2_1859.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  57%|███████████████████████████████████████████▉                                 | 171/300 [16:18<10:00,  4.65s/it]

⚠️ Image PMC8260581_Fig2_1860.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  57%|████████████████████████████████████████████▏                                | 172/300 [16:23<10:00,  4.70s/it]

⚠️ Image PMC8260581_Fig2_1861.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  58%|████████████████████████████████████████████▍                                | 173/300 [16:28<10:02,  4.74s/it]

⚠️ Image PMC8260593_Fig1_1866.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  58%|████████████████████████████████████████████▋                                | 174/300 [16:34<10:50,  5.16s/it]

⚠️ Image PMC8260593_Fig1_1866.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  58%|████████████████████████████████████████████▉                                | 175/300 [16:40<11:25,  5.49s/it]

Progress: 175/300 - Current Accuracy: 0.4857 (85/175)
⚠️ Image PMC8260716_Fig1_1910.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  59%|█████████████████████████████████████████████▏                               | 176/300 [16:47<12:02,  5.83s/it]

⚠️ Image PMC8260716_Fig1_1912.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  59%|█████████████████████████████████████████████▍                               | 177/300 [16:52<11:26,  5.58s/it]

⚠️ Image PMC8260716_Fig2_1914.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  59%|█████████████████████████████████████████████▋                               | 178/300 [16:58<11:45,  5.78s/it]

⚠️ Image PMC8260716_Fig2_1915.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  60%|█████████████████████████████████████████████▉                               | 179/300 [17:03<11:05,  5.50s/it]

⚠️ Image PMC8260716_Fig2_1918.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  60%|██████████████████████████████████████████████▏                              | 180/300 [17:08<10:32,  5.27s/it]

Progress: 180/300 - Current Accuracy: 0.4944 (89/180)
⚠️ Image PMC8260716_Fig2_1918.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  60%|██████████████████████████████████████████████▍                              | 181/300 [17:14<11:05,  5.59s/it]

⚠️ Image PMC8260745_fig0003_1938.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  61%|██████████████████████████████████████████████▋                              | 182/300 [17:21<11:51,  6.03s/it]

⚠️ Image PMC8260745_fig0003_1940.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  61%|██████████████████████████████████████████████▉                              | 183/300 [17:26<11:01,  5.66s/it]

⚠️ Image PMC8260745_fig0003_1940.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  61%|███████████████████████████████████████████████▏                             | 184/300 [17:31<10:56,  5.66s/it]

⚠️ Image PMC8260745_fig0003_1941.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  62%|███████████████████████████████████████████████▍                             | 185/300 [17:36<10:00,  5.22s/it]

Progress: 185/300 - Current Accuracy: 0.4973 (92/185)
⚠️ Image PMC8260745_fig0003_1943.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  62%|███████████████████████████████████████████████▋                             | 186/300 [17:41<09:55,  5.22s/it]

⚠️ Image PMC8260745_fig0003_1943.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  62%|███████████████████████████████████████████████▉                             | 187/300 [17:46<09:57,  5.28s/it]

⚠️ Image PMC8260816_jbm410509-fig-0003_1963.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  63%|████████████████████████████████████████████████▎                            | 188/300 [17:51<09:42,  5.20s/it]

⚠️ Image PMC8260843_F1_1986.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  63%|████████████████████████████████████████████████▌                            | 189/300 [17:55<09:00,  4.87s/it]

⚠️ Image PMC8260843_F3_2001.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  63%|████████████████████████████████████████████████▊                            | 190/300 [18:03<10:18,  5.62s/it]

Progress: 190/300 - Current Accuracy: 0.5000 (95/190)
⚠️ Image PMC8260843_F3_2003.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  64%|█████████████████████████████████████████████████                            | 191/300 [18:08<10:10,  5.60s/it]

⚠️ Image PMC8260853_F3_2017.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  64%|█████████████████████████████████████████████████▎                           | 192/300 [18:13<09:31,  5.30s/it]

⚠️ Image PMC8260853_F3_2018.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  64%|█████████████████████████████████████████████████▌                           | 193/300 [18:17<08:43,  4.89s/it]

⚠️ Image PMC8260853_F3_2020.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  65%|█████████████████████████████████████████████████▊                           | 194/300 [18:24<09:42,  5.49s/it]

⚠️ Image PMC8260853_F2_2023.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  65%|██████████████████████████████████████████████████                           | 195/300 [18:30<09:43,  5.56s/it]

Progress: 195/300 - Current Accuracy: 0.5026 (98/195)
⚠️ Image PMC8260930_F1_2031.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  65%|██████████████████████████████████████████████████▎                          | 196/300 [18:34<08:56,  5.16s/it]

⚠️ Image PMC8260930_F1_2033.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  66%|██████████████████████████████████████████████████▌                          | 197/300 [18:39<09:02,  5.27s/it]

⚠️ Image PMC8260958_fig0001_2059.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  66%|██████████████████████████████████████████████████▊                          | 198/300 [18:45<09:18,  5.47s/it]

⚠️ Image PMC8260958_fig0003_2061.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  66%|███████████████████████████████████████████████████                          | 199/300 [18:49<08:28,  5.03s/it]

⚠️ Image PMC8260958_fig0002_2071.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  67%|███████████████████████████████████████████████████▎                         | 200/300 [18:54<08:07,  4.87s/it]

Progress: 200/300 - Current Accuracy: 0.4950 (99/200)
⚠️ Image PMC8261170_fig3_2107.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  67%|███████████████████████████████████████████████████▌                         | 201/300 [18:58<07:50,  4.75s/it]

⚠️ Image PMC8261170_fig3_2108.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  67%|███████████████████████████████████████████████████▊                         | 202/300 [19:02<07:21,  4.51s/it]

⚠️ Image PMC8261486_advs2551-fig-0001_2181.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  68%|████████████████████████████████████████████████████                         | 203/300 [19:07<07:28,  4.63s/it]

⚠️ Image PMC8261639_biomimetics-06-00036-f015_2259.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  68%|████████████████████████████████████████████████████▎                        | 204/300 [19:13<08:10,  5.11s/it]

⚠️ Image PMC8262043_Fig1_2353.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  68%|████████████████████████████████████████████████████▌                        | 205/300 [19:19<08:13,  5.20s/it]

Progress: 205/300 - Current Accuracy: 0.5024 (103/205)
⚠️ Image PMC8262043_Fig1_2355.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  69%|████████████████████████████████████████████████████▊                        | 206/300 [19:25<08:31,  5.44s/it]

⚠️ Image PMC8262043_Fig1_2356.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  69%|█████████████████████████████████████████████████████▏                       | 207/300 [19:30<08:18,  5.36s/it]

⚠️ Image PMC8262043_Fig2_2358.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  69%|█████████████████████████████████████████████████████▍                       | 208/300 [19:36<08:42,  5.68s/it]

⚠️ Image PMC8262043_Fig2_2359.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  70%|█████████████████████████████████████████████████████▋                       | 209/300 [19:40<07:42,  5.08s/it]

⚠️ Image PMC8262043_Fig3_2362.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  70%|█████████████████████████████████████████████████████▉                       | 210/300 [19:44<07:06,  4.73s/it]

Progress: 210/300 - Current Accuracy: 0.5095 (107/210)
⚠️ Image PMC8262043_Fig3_2362.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  70%|██████████████████████████████████████████████████████▏                      | 211/300 [19:51<08:02,  5.42s/it]

⚠️ Image PMC8262110_FIG2_2374.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  71%|██████████████████████████████████████████████████████▍                      | 212/300 [19:55<07:32,  5.14s/it]

⚠️ Image PMC8262110_FIG2_2374.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  71%|██████████████████████████████████████████████████████▋                      | 213/300 [20:00<07:06,  4.91s/it]

⚠️ Image PMC8262143_F2_2378.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  71%|██████████████████████████████████████████████████████▉                      | 214/300 [20:05<07:10,  5.01s/it]

⚠️ Image PMC8262156_F2_2381.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  72%|███████████████████████████████████████████████████████▏                     | 215/300 [20:10<06:58,  4.93s/it]

Progress: 215/300 - Current Accuracy: 0.5163 (111/215)
⚠️ Image PMC8262156_F2_2382.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  72%|███████████████████████████████████████████████████████▍                     | 216/300 [20:13<06:21,  4.54s/it]

⚠️ Image PMC8262156_F2_2383.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  72%|███████████████████████████████████████████████████████▋                     | 217/300 [20:18<06:21,  4.59s/it]

⚠️ Image PMC8262157_F1_2384.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  73%|███████████████████████████████████████████████████████▉                     | 218/300 [20:22<06:07,  4.48s/it]

⚠️ Image PMC8262157_F1_2385.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  73%|████████████████████████████████████████████████████████▏                    | 219/300 [20:27<06:08,  4.54s/it]

⚠️ Image PMC8262157_F1_2385.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  73%|████████████████████████████████████████████████████████▍                    | 220/300 [20:30<05:35,  4.19s/it]

Progress: 220/300 - Current Accuracy: 0.5227 (115/220)
⚠️ Image PMC8262157_F1_2386.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  74%|████████████████████████████████████████████████████████▋                    | 221/300 [20:35<05:31,  4.20s/it]

⚠️ Image PMC8262163_F3_2404.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  74%|████████████████████████████████████████████████████████▉                    | 222/300 [20:40<05:49,  4.48s/it]

⚠️ Image PMC8262163_F3_2406.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  74%|█████████████████████████████████████████████████████████▏                   | 223/300 [20:44<05:38,  4.39s/it]

⚠️ Image PMC8262179_fig5a_2422.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  75%|█████████████████████████████████████████████████████████▍                   | 224/300 [20:49<05:50,  4.61s/it]

⚠️ Image PMC8262179_fig5a_2427.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  75%|█████████████████████████████████████████████████████████▊                   | 225/300 [20:55<06:07,  4.91s/it]

Progress: 225/300 - Current Accuracy: 0.5200 (117/225)
⚠️ Image PMC8262179_fig5c_2439.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  75%|██████████████████████████████████████████████████████████                   | 226/300 [21:01<06:31,  5.28s/it]

⚠️ Image PMC8262179_fig7a_2445.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  76%|██████████████████████████████████████████████████████████▎                  | 227/300 [21:09<07:20,  6.03s/it]

⚠️ Image PMC8262179_fig7a_2447.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  76%|██████████████████████████████████████████████████████████▌                  | 228/300 [21:13<06:43,  5.60s/it]

⚠️ Image PMC8262179_fig7a_2449.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  76%|██████████████████████████████████████████████████████████▊                  | 229/300 [21:19<06:36,  5.58s/it]

⚠️ Image PMC8262179_fig7a_2449.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  77%|███████████████████████████████████████████████████████████                  | 230/300 [21:24<06:26,  5.52s/it]

Progress: 230/300 - Current Accuracy: 0.5217 (120/230)
⚠️ Image PMC8262179_fig7b_2452.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  77%|███████████████████████████████████████████████████████████▎                 | 231/300 [21:29<06:07,  5.32s/it]

⚠️ Image PMC8262179_fig7b_2452.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  77%|███████████████████████████████████████████████████████████▌                 | 232/300 [21:34<05:49,  5.14s/it]

⚠️ Image PMC8262179_fig7b_2461.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  78%|███████████████████████████████████████████████████████████▊                 | 233/300 [21:39<05:54,  5.29s/it]

⚠️ Image PMC8262179_fig17b_2463.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  78%|████████████████████████████████████████████████████████████                 | 234/300 [21:46<06:20,  5.76s/it]

⚠️ Image PMC8262193_F1_2494.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  78%|████████████████████████████████████████████████████████████▎                | 235/300 [21:52<06:07,  5.66s/it]

Progress: 235/300 - Current Accuracy: 0.5277 (124/235)
⚠️ Image PMC8262193_F1_2495.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  79%|████████████████████████████████████████████████████████████▌                | 236/300 [21:57<06:04,  5.69s/it]

⚠️ Image PMC8262221_F2_2511.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  79%|████████████████████████████████████████████████████████████▊                | 237/300 [22:02<05:39,  5.39s/it]

⚠️ Image PMC8262240_F2_2521.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  79%|█████████████████████████████████████████████████████████████                | 238/300 [22:07<05:21,  5.18s/it]

⚠️ Image PMC8262240_F2_2522.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  80%|█████████████████████████████████████████████████████████████▎               | 239/300 [22:11<05:01,  4.94s/it]

⚠️ Image PMC8262240_F2_2523.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  80%|█████████████████████████████████████████████████████████████▌               | 240/300 [22:16<04:55,  4.93s/it]

Progress: 240/300 - Current Accuracy: 0.5250 (126/240)
⚠️ Image PMC8262340_F3_2639.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  80%|█████████████████████████████████████████████████████████████▊               | 241/300 [22:21<04:44,  4.82s/it]

⚠️ Image PMC8262340_F3_2641.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  81%|██████████████████████████████████████████████████████████████               | 242/300 [22:24<04:12,  4.36s/it]

⚠️ Image PMC8262504_F3_2681.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  81%|██████████████████████████████████████████████████████████████▎              | 243/300 [22:31<04:50,  5.10s/it]

⚠️ Image PMC8262504_F3_2685.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  81%|██████████████████████████████████████████████████████████████▋              | 244/300 [22:35<04:37,  4.96s/it]

⚠️ Image PMC8262792_pone.0253599.g006_2768.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  82%|██████████████████████████████████████████████████████████████▉              | 245/300 [22:40<04:28,  4.88s/it]

Progress: 245/300 - Current Accuracy: 0.5265 (129/245)
⚠️ Image PMC8262843_f2_2769.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  82%|███████████████████████████████████████████████████████████████▏             | 246/300 [22:45<04:27,  4.96s/it]

⚠️ Image PMC8262843_f2_2769.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  82%|███████████████████████████████████████████████████████████████▍             | 247/300 [22:51<04:34,  5.18s/it]

⚠️ Image PMC8262843_f2_2771.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  83%|███████████████████████████████████████████████████████████████▋             | 248/300 [22:56<04:23,  5.07s/it]

⚠️ Image PMC8262843_f2_2772.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  83%|███████████████████████████████████████████████████████████████▉             | 249/300 [23:00<04:03,  4.78s/it]

⚠️ Image PMC8262843_f2_2775.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  83%|████████████████████████████████████████████████████████████████▏            | 250/300 [23:07<04:34,  5.50s/it]

Progress: 250/300 - Current Accuracy: 0.5280 (132/250)
⚠️ Image PMC8262843_f2_2778.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  84%|████████████████████████████████████████████████████████████████▍            | 251/300 [23:12<04:25,  5.41s/it]

⚠️ Image PMC8262949_fig1_2816.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  84%|████████████████████████████████████████████████████████████████▋            | 252/300 [23:18<04:21,  5.46s/it]

⚠️ Image PMC8262949_fig5_2825.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  84%|████████████████████████████████████████████████████████████████▉            | 253/300 [23:22<04:04,  5.21s/it]

⚠️ Image PMC8263054_F1_2883.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  85%|█████████████████████████████████████████████████████████████████▏           | 254/300 [23:26<03:38,  4.75s/it]

⚠️ Image PMC8263054_F1_2884.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  85%|█████████████████████████████████████████████████████████████████▍           | 255/300 [23:31<03:36,  4.82s/it]

Progress: 255/300 - Current Accuracy: 0.5333 (136/255)
⚠️ Image PMC8263054_F1_2887.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  85%|█████████████████████████████████████████████████████████████████▋           | 256/300 [23:34<03:13,  4.39s/it]

⚠️ Image PMC8263054_F1_2889.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  86%|█████████████████████████████████████████████████████████████████▉           | 257/300 [23:41<03:36,  5.04s/it]

⚠️ Image PMC8263055_awaa420-F3_2902.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  86%|██████████████████████████████████████████████████████████████████▏          | 258/300 [23:47<03:43,  5.33s/it]

⚠️ Image PMC8263230_fig4_2952.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  86%|██████████████████████████████████████████████████████████████████▍          | 259/300 [23:53<03:41,  5.41s/it]

⚠️ Image PMC8263230_fig4_2953.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  87%|██████████████████████████████████████████████████████████████████▋          | 260/300 [23:58<03:34,  5.35s/it]

Progress: 260/300 - Current Accuracy: 0.5346 (139/260)
⚠️ Image PMC8263397_Fig1_3001.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  87%|██████████████████████████████████████████████████████████████████▉          | 261/300 [24:04<03:42,  5.70s/it]

⚠️ Image PMC8263417_Fig6_3022.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  87%|███████████████████████████████████████████████████████████████████▏         | 262/300 [24:10<03:41,  5.82s/it]

⚠️ Image PMC8263453_Fig1_3073.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  88%|███████████████████████████████████████████████████████████████████▌         | 263/300 [24:17<03:45,  6.09s/it]

⚠️ Image PMC8263539_Fig1_3120.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  88%|███████████████████████████████████████████████████████████████████▊         | 264/300 [24:22<03:26,  5.74s/it]

⚠️ Image PMC8263539_Fig1_3123.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  88%|████████████████████████████████████████████████████████████████████         | 265/300 [24:26<03:03,  5.25s/it]

Progress: 265/300 - Current Accuracy: 0.5358 (142/265)
⚠️ Image PMC8263539_Fig1_3123.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  89%|████████████████████████████████████████████████████████████████████▎        | 266/300 [24:32<03:01,  5.33s/it]

⚠️ Image PMC8263539_Fig2_3138.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  89%|████████████████████████████████████████████████████████████████████▌        | 267/300 [24:36<02:44,  4.99s/it]

⚠️ Image PMC8263539_Fig2_3140.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  89%|████████████████████████████████████████████████████████████████████▊        | 268/300 [24:40<02:33,  4.79s/it]

⚠️ Image PMC8263539_Fig2_3141.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  90%|█████████████████████████████████████████████████████████████████████        | 269/300 [24:45<02:29,  4.82s/it]

⚠️ Image PMC8263575_Fig3_3159.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  90%|█████████████████████████████████████████████████████████████████████▎       | 270/300 [24:49<02:14,  4.49s/it]

Progress: 270/300 - Current Accuracy: 0.5407 (146/270)
⚠️ Image PMC8263575_Fig3_3160.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  90%|█████████████████████████████████████████████████████████████████████▌       | 271/300 [24:53<02:09,  4.47s/it]

⚠️ Image PMC8263575_Fig3_3161.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  91%|█████████████████████████████████████████████████████████████████████▊       | 272/300 [24:57<01:56,  4.16s/it]

⚠️ Image PMC8263575_Fig5_3175.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  91%|██████████████████████████████████████████████████████████████████████       | 273/300 [25:01<01:50,  4.09s/it]

⚠️ Image PMC8263575_Fig5_3176.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  91%|██████████████████████████████████████████████████████████████████████▎      | 274/300 [25:07<02:05,  4.81s/it]

⚠️ Image PMC8263575_Fig5_3177.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  92%|██████████████████████████████████████████████████████████████████████▌      | 275/300 [25:12<02:01,  4.84s/it]

Progress: 275/300 - Current Accuracy: 0.5455 (150/275)
⚠️ Image PMC8263575_Fig5_3179.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  92%|██████████████████████████████████████████████████████████████████████▊      | 276/300 [25:16<01:53,  4.72s/it]

⚠️ Image PMC8263753_Fig3_3257.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  92%|███████████████████████████████████████████████████████████████████████      | 277/300 [25:22<01:57,  5.11s/it]

⚠️ Image PMC8263832_Fig2_3276.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  93%|███████████████████████████████████████████████████████████████████████▎     | 278/300 [25:27<01:49,  4.99s/it]

⚠️ Image PMC8263845_Fig6_3289.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  93%|███████████████████████████████████████████████████████████████████████▌     | 279/300 [25:31<01:37,  4.63s/it]

⚠️ Image PMC8263845_Fig6_3289.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  93%|███████████████████████████████████████████████████████████████████████▊     | 280/300 [25:37<01:40,  5.02s/it]

Progress: 280/300 - Current Accuracy: 0.5464 (153/280)
⚠️ Image PMC8263845_Fig5_3291.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  94%|████████████████████████████████████████████████████████████████████████     | 281/300 [25:44<01:49,  5.77s/it]

⚠️ Image PMC8263845_Fig11_3298.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  94%|████████████████████████████████████████████████████████████████████████▍    | 282/300 [25:52<01:55,  6.43s/it]

⚠️ Image PMC8263845_Fig16_3305.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  94%|████████████████████████████████████████████████████████████████████████▋    | 283/300 [25:56<01:37,  5.73s/it]

⚠️ Image PMC8263845_Fig18_3314.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  95%|████████████████████████████████████████████████████████████████████████▉    | 284/300 [26:02<01:30,  5.64s/it]

⚠️ Image PMC8263845_Fig22_3319.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  95%|█████████████████████████████████████████████████████████████████████████▏   | 285/300 [26:10<01:34,  6.29s/it]

Progress: 285/300 - Current Accuracy: 0.5439 (155/285)
⚠️ Image PMC8263845_Fig21_3320.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  95%|█████████████████████████████████████████████████████████████████████████▍   | 286/300 [26:16<01:26,  6.20s/it]

⚠️ Image PMC8263845_Fig21_3320.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  96%|█████████████████████████████████████████████████████████████████████████▋   | 287/300 [26:20<01:13,  5.63s/it]

⚠️ Image PMC8263845_Fig21_3321.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  96%|█████████████████████████████████████████████████████████████████████████▉   | 288/300 [26:28<01:14,  6.24s/it]

⚠️ Image PMC8263845_Fig26_3331.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  96%|██████████████████████████████████████████████████████████████████████████▏  | 289/300 [26:34<01:08,  6.19s/it]

⚠️ Image PMC8263845_Fig31_3337.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  97%|██████████████████████████████████████████████████████████████████████████▍  | 290/300 [26:39<00:58,  5.80s/it]

Progress: 290/300 - Current Accuracy: 0.5448 (158/290)
⚠️ Image PMC8263847_Fig4_3346.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  97%|██████████████████████████████████████████████████████████████████████████▋  | 291/300 [26:43<00:48,  5.44s/it]

⚠️ Image PMC8263847_Fig4_3348.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  97%|██████████████████████████████████████████████████████████████████████████▉  | 292/300 [26:47<00:40,  5.02s/it]

⚠️ Image PMC8263847_Fig1_3353.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  98%|███████████████████████████████████████████████████████████████████████████▏ | 293/300 [26:55<00:40,  5.72s/it]

⚠️ Image PMC8263847_Fig1_3354.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  98%|███████████████████████████████████████████████████████████████████████████▍ | 294/300 [26:59<00:32,  5.35s/it]

⚠️ Image PMC8263847_Fig1_3356.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  98%|███████████████████████████████████████████████████████████████████████████▋ | 295/300 [27:03<00:24,  4.99s/it]

Progress: 295/300 - Current Accuracy: 0.5525 (163/295)
⚠️ Image PMC8263847_Fig2_3358.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  99%|███████████████████████████████████████████████████████████████████████████▉ | 296/300 [27:09<00:20,  5.08s/it]

⚠️ Image PMC8263847_Fig7_3363.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  99%|████████████████████████████████████████████████████████████████████████████▏| 297/300 [27:21<00:22,  7.42s/it]

⚠️ Image PMC8263847_Fig7_3365.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG:  99%|████████████████████████████████████████████████████████████████████████████▍| 298/300 [27:26<00:13,  6.70s/it]

⚠️ Image PMC8263847_Fig7_3366.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG: 100%|████████████████████████████████████████████████████████████████████████████▋| 299/300 [27:33<00:06,  6.60s/it]

⚠️ Image PMC8263847_Fig10_3369.jpg not found in PMC embeddings, using fallback


Running Multiple-Choice Medical RAG: 100%|█████████████████████████████████████████████████████████████████████████████| 300/300 [27:37<00:00,  5.52s/it]

Progress: 300/300 - Current Accuracy: 0.5567 (167/300)
Final Accuracy: 0.5567 (167/300)
✅ Results saved to medical_multiple_choice_results.json
✅ Multiple-choice medical RAG process completed!
Final accuracy: 0.5567





# **Llava**

In [None]:
# Install required libraries
!pip install torch torchvision transformers peft accelerate bitsandbytes datasets evaluate
!pip install sentencepiece
!pip install -U huggingface_hub

Collecting peft
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading peft-0.15.2-py3-none-any.whl (411 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m411.1/411.1 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.6.0-py3-none-any.whl (354 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m354.7/354.7 kB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl (76.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m49.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━

## 1. Data Preparation

In [None]:
import pandas as pd
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# Make sure you have the correct path to your image directory
image_dir = './images_2/figures'  # Replace with your actual path

# Examine the data structure
print(f"Train set size: {len(train_df)}")
print(f"Test set size: {len(test_df)}")
print(f"Column names: {train_df.columns.tolist()}")

# Check that image files exist
sample_image_path = os.path.join(image_dir, train_df.iloc[0]['Image_Name'])
print(f"Sample image path: {sample_image_path}")
print(f"Image exists: {os.path.exists(sample_image_path)}")

Train set size: 31329
Test set size: 33430
Column names: ['Figure_path', 'Question', 'Answer', 'Choice A', 'Choice B', 'Choice C', 'Choice D', 'Answer_label', 'Image_Name']
Sample image path: ./images_2/figures/PMC1064097_F4_1520.jpg
Image exists: True


## 2. Load the LLaVA Model and Processor

In [None]:
from transformers import LlavaProcessor, LlavaForConditionalGeneration
import torch

# Define model ID
model_id = "llava-hf/llava-1.5-7b-hf"

# First, load the processor
processor = LlavaProcessor.from_pretrained(model_id)

# Check what image token the model uses
special_tokens = processor.tokenizer.special_tokens_map
print(f"Special tokens: {special_tokens}")

# Load model with 8-bit quantization for memory efficiency
model = LlavaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    load_in_8bit=True,
    device_map="auto"
)

# Check if model has an image token index
if hasattr(model.config, "image_token_index"):
    print(f"Model image token index: {model.config.image_token_index}")
    # Convert token ID to actual token string
    token = processor.tokenizer.convert_ids_to_tokens(model.config.image_token_index)
    print(f"Image token: {token}")
else:
    print("Model does not have a defined image_token_index")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Special tokens: {'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '<pad>', 'image_token': '<image>'}


Loading checkpoint shards: 100%|██████████| 3/3 [00:05<00:00,  1.90s/it]

Model image token index: 32000
Image token: <image>





## 3. Create Custom Dataset Class

In [None]:
class MedVQADataset(Dataset):
    def __init__(self, dataframe, image_dir, processor):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.processor = processor
        # Use the image token we confirmed from the model
        self.image_token = "<image>"

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]

        # Load image
        image_path = os.path.join(self.image_dir, row['Image_Name'])
        image = Image.open(image_path).convert('RGB')

        # Format the prompt with multiple choice options
        question = row['Question']
        choices = f"A: {row['Choice A'].split(':', 1)[1] if ':' in row['Choice A'] else row['Choice A']}\n" \
                 f"B: {row['Choice B'].split(':', 1)[1] if ':' in row['Choice B'] else row['Choice B']}\n" \
                 f"C: {row['Choice C'].split(':', 1)[1] if ':' in row['Choice C'] else row['Choice C']}\n" \
                 f"D: {row['Choice D'].split(':', 1)[1] if ':' in row['Choice D'] else row['Choice D']}"

        # Use the image token at the beginning of the prompt
        prompt = f"{self.image_token}\nQuestion: {question}\n\nOptions:\n{choices}\n\nPlease select the correct answer (A, B, C, or D):"

        # Process inputs using the processor
        inputs = self.processor(
            images=image,
            text=prompt,
            return_tensors="pt"
        )

        # Prepare target (answer label)
        target = row['Answer_label']

        # Add target to inputs
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}
        inputs["labels"] = self.processor.tokenizer(f" {target}", return_tensors="pt").input_ids[:, 1:].squeeze(0)

        return inputs

# Create a small test dataset and fetch a sample
test_dataset = MedVQADataset(train_df.head(5), image_dir, processor)
print(f"Dataset size: {len(test_dataset)}")

# Test with one sample
sample = test_dataset[0]
print(f"Sample keys: {sample.keys()}")

# Check if the image token is correctly included
input_ids = sample['input_ids']
tokens = processor.tokenizer.convert_ids_to_tokens(input_ids)

# Find the image token in the input
image_token_id = processor.tokenizer.convert_tokens_to_ids("<image>")
print(f"Image token ID: {image_token_id}")

# Count how many image tokens are in the input
image_token_count = (input_ids == image_token_id).sum().item()
print(f"Number of image tokens in input: {image_token_count}")

# Check pixel values shape
if 'pixel_values' in sample:
    print(f"Pixel values shape: {sample['pixel_values'].shape}")

Dataset size: 5
Sample keys: dict_keys(['input_ids', 'attention_mask', 'pixel_values', 'labels'])
Image token ID: 32000
Number of image tokens in input: 576
Pixel values shape: torch.Size([3, 336, 336])


## 4. Set Up LoRA for Fine-tuning

In [None]:
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training

# Prepare model for LoRA fine-tuning
model = prepare_model_for_kbit_training(model)

# Define LoRA configuration
peft_config = LoraConfig(
    task_type="CAUSAL_LM",
    inference_mode=False,
    r=8,  # Rank
    lora_alpha=32,
    lora_dropout=0.1,
    # Target the language model's attention modules
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]
)

# Apply LoRA adapters to the model
model = get_peft_model(model, peft_config)

# Print summary of trainable parameters
print("Trainable parameters summary:")
model.print_trainable_parameters()

Trainable parameters summary:
trainable params: 9,568,256 || all params: 7,072,995,328 || trainable%: 0.1353


## 5. Create DataLoader and Training Configuration

In [None]:
from dataclasses import dataclass
from typing import Dict, List, Any
import torch

@dataclass
class ImprovedLlavaDataCollator:
    def __call__(self, batch):
        # Initialize the batch dictionary
        collated_batch = {}

        # Handle pixel_values (image features)
        if "pixel_values" in batch[0]:
            collated_batch["pixel_values"] = torch.stack([item["pixel_values"] for item in batch])

        # Find the maximum lengths
        max_input_len = max(len(item["input_ids"]) for item in batch)

        # Prepare input_ids and attention_mask
        input_ids_list = []
        attention_mask_list = []

        for item in batch:
            # Pad input_ids
            input_ids = item["input_ids"]
            padding_len = max_input_len - len(input_ids)

            padded_input_ids = torch.cat([
                input_ids,
                torch.full((padding_len,), processor.tokenizer.pad_token_id, dtype=input_ids.dtype)
            ])
            input_ids_list.append(padded_input_ids)

            # Pad attention_mask
            attention_mask = item["attention_mask"]
            padded_attention_mask = torch.cat([
                attention_mask,
                torch.zeros(padding_len, dtype=attention_mask.dtype)
            ])
            attention_mask_list.append(padded_attention_mask)

        collated_batch["input_ids"] = torch.stack(input_ids_list)
        collated_batch["attention_mask"] = torch.stack(attention_mask_list)

        # Handle labels
        if "labels" in batch[0]:
            labels_list = []

            # Get max label length
            max_label_len = max(len(item["labels"]) for item in batch)

            for item in batch:
                labels = item["labels"]
                padding_len = max_label_len - len(labels)

                # Pad with -100 (ignored in loss calculation)
                padded_labels = torch.cat([
                    labels,
                    torch.full((padding_len,), -100, dtype=labels.dtype)
                ])
                labels_list.append(padded_labels)

            collated_batch["labels"] = torch.stack(labels_list)

        return collated_batch

# Create the improved data collator
data_collator = ImprovedLlavaDataCollator()

# Reduce batch size to help with memory issues
batch_size = 2

# Update training arguments
training_args = TrainingArguments(
    output_dir="./results/llava-med-lora",
    num_train_epochs=3,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    warmup_steps=200,
    learning_rate=2e-4,
    fp16=True,
    logging_dir="./logs",
    logging_steps=20,
    save_strategy="steps",
    save_steps=200,
    eval_strategy="steps",
    eval_steps=200,
    load_best_model_at_end=True,
    report_to="none",
    gradient_accumulation_steps=8  # Increased to help with memory
)

# Initialize the trainer with the new collator
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

print("Updated trainer initialized with batch size:", batch_size)
print("Starting training with improved collator...")

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Updated trainer initialized with batch size: 2
Starting training with improved collator...


## 6. Set Up the Trainer and Compute Metrics

In [None]:
from transformers import Trainer
import numpy as np

# Define a compute_metrics function for evaluation
def compute_metrics(eval_pred):
    logits, labels = eval_pred

    # Create a mask where labels are not -100 (padding)
    mask = labels != -100

    # Get predictions for the valid positions only
    predictions = np.argmax(logits, axis=-1)

    # Count correct predictions (where prediction matches label)
    correct = 0
    total = 0

    for i in range(len(labels)):
        for j in range(len(labels[i])):
            if labels[i][j] != -100:  # Skip padding tokens
                total += 1
                if predictions[i][j] == labels[i][j]:
                    correct += 1

    # Calculate accuracy
    accuracy = correct / total if total > 0 else 0

    return {"accuracy": accuracy}

# Initialize the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

print("Trainer initialized successfully!")
print(f"Training dataset size: {len(train_dataset)}")
print(f"Testing dataset size: {len(test_dataset)}")

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Trainer initialized successfully!
Training dataset size: 31329
Testing dataset size: 33430


## 7. Train the Model

In [None]:
# import torch
# from torch.utils.data import DataLoader
# from tqdm.auto import tqdm
# from torch.optim import AdamW  # Use AdamW from torch.optim instead

# # Create a subset of the data for testing our approach
# train_subset = train_dataset
# if len(train_dataset) > 1000:
#     # Use a smaller subset for initial testing
#     train_subset = torch.utils.data.Subset(train_dataset, list(range(1000)))
#     print(f"Using a subset of {len(train_subset)} examples for testing")
# else:
#     print(f"Using all {len(train_subset)} examples")

# # Create a simpler data collator that just returns the batch
# def simple_collator(batch):
#     # Extract pixel values
#     pixel_values = torch.stack([item["pixel_values"] for item in batch])

#     # Process input_ids and attention_mask with padding
#     max_len = max(len(item["input_ids"]) for item in batch)

#     # Prepare padded tensors
#     input_ids = []
#     attention_mask = []

#     for item in batch:
#         # Pad input_ids
#         ids = item["input_ids"]
#         padding = torch.full((max_len - len(ids),), processor.tokenizer.pad_token_id, dtype=ids.dtype)
#         input_ids.append(torch.cat([ids, padding]))

#         # Pad attention_mask
#         mask = item["attention_mask"]
#         padding = torch.zeros(max_len - len(mask), dtype=mask.dtype)
#         attention_mask.append(torch.cat([mask, padding]))

#     # Stack tensors
#     input_ids = torch.stack(input_ids)
#     attention_mask = torch.stack(attention_mask)

#     # Simple target: just use the first letter of the answer
#     targets = []
#     for item in batch:
#         # Get the label
#         label_str = processor.tokenizer.decode(item["labels"])
#         # Extract first character (should be A, B, C, or D)
#         if len(label_str) > 0 and label_str[0] in "ABCD":
#             # Map A, B, C, D to 0, 1, 2, 3
#             target = ord(label_str[0]) - ord('A')
#         else:
#             # Default to A if label is not recognized
#             target = 0
#         targets.append(target)

#     targets = torch.tensor(targets, dtype=torch.long)

#     return {
#         "pixel_values": pixel_values,
#         "input_ids": input_ids,
#         "attention_mask": attention_mask,
#         "targets": targets
#     }

# # Create dataloader with simple collator
# batch_size = 4
# dataloader = DataLoader(
#     train_subset,
#     batch_size=batch_size,
#     shuffle=True,
#     collate_fn=simple_collator
# )

# print(f"Created dataloader with batch size {batch_size}")

# # Set up optimizer
# optimizer = AdamW(model.parameters(), lr=5e-5)

# # Setup device
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

# # Training loop
# num_epochs = 1
# print(f"Starting custom training loop for {num_epochs} epochs")

# model.train()
# for epoch in range(num_epochs):
#     total_loss = 0
#     progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}")

#     for batch in progress_bar:
#         # Move batch to device
#         batch = {k: v.to(device) for k, v in batch.items()}

#         # Extract targets
#         targets = batch.pop("targets")

#         # Zero gradients
#         optimizer.zero_grad()

#         # Forward pass - pass the main inputs
#         outputs = model(
#             input_ids=batch["input_ids"],
#             attention_mask=batch["attention_mask"],
#             pixel_values=batch["pixel_values"],
#         )

#         # Extract logits for the relevant positions (last token of input)
#         logits = outputs.logits

#         # Compute loss
#         # Simple classification loss: take the last token of each sequence
#         last_token_logits = logits[:, -1, :]
#         # Restrict to the first few tokens that represent A, B, C, D
#         relevant_logits = last_token_logits[:, :4]
#         loss = torch.nn.functional.cross_entropy(relevant_logits, targets)

#         # Backward pass
#         loss.backward()

#         # Update weights
#         optimizer.step()

#         # Update progress bar
#         total_loss += loss.item()
#         progress_bar.set_postfix({"loss": loss.item()})

#     avg_loss = total_loss / len(dataloader)
#     print(f"Epoch {epoch+1} - Average loss: {avg_loss:.4f}")

# # Save the fine-tuned model
# output_dir = "./final_model/llava-med-lora-custom"
# print(f"Saving model to {output_dir}")
# model.save_pretrained(output_dir)
# processor.save_pretrained(output_dir)
# print("Model saved successfully!")

Using a subset of 1000 examples for testing
Created dataloader with batch size 4
Starting custom training loop for 1 epochs


Epoch 1: 100%|██████████| 250/250 [07:03<00:00,  1.70s/it, loss=7.33e-5]


Epoch 1 - Average loss: 0.9325
Saving model to ./final_model/llava-med-lora-custom
Model saved successfully!


In [None]:
# Define a function to run inference on a single example
def predict_single_example(image_path, question, choices, true_answer=None):
    """
    Run model prediction on a single example

    Args:
        image_path: Path to the image file
        question: Question text
        choices: Dictionary of choices (A, B, C, D)
        true_answer: The correct answer (optional)

    Returns:
        predicted_answer: The model's prediction
    """
    # Load and process the image
    image = Image.open(image_path).convert('RGB')

    # Format choices
    choices_text = f"A: {choices['A']}\nB: {choices['B']}\nC: {choices['C']}\nD: {choices['D']}"

    # Create prompt with image token
    prompt = f"<image>\nQuestion: {question}\n\nOptions:\n{choices_text}\n\nPlease select the correct answer (A, B, C, or D):"

    # Process inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate prediction
    with torch.no_grad():
        outputs = model(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            pixel_values=inputs["pixel_values"]
        )

    # Get prediction
    logits = outputs.logits
    last_token_logits = logits[:, -1, :]
    relevant_logits = last_token_logits[:, :4]  # A, B, C, D
    prediction_idx = torch.argmax(relevant_logits, dim=1).item()

    # Convert to letter
    predicted_answer = chr(ord('A') + prediction_idx)

    # Check if correct
    is_correct = (predicted_answer == true_answer) if true_answer else None

    return {
        "predicted_answer": predicted_answer,
        "is_correct": is_correct
    }

# Let's evaluate on a few samples from your test set
import random

# Assuming train_df has the right structure, let's use it for evaluation
if 'Image_Name' in train_df.columns:
    # Select a sample of examples for evaluation
    num_samples = 20
    sample_indices = random.sample(range(len(train_df)), min(num_samples, len(train_df)))
    samples = [train_df.iloc[i] for i in sample_indices]

    # Track results
    correct = 0
    total = 0

    print(f"Evaluating model on {len(samples)} examples...")

    for i, row in enumerate(samples):
        # Get image path
        image_path = os.path.join(image_dir, row['Image_Name'])

        # Get question and choices
        question = row['Question']
        choices = {
            'A': row['Choice A'],
            'B': row['Choice B'],
            'C': row['Choice C'],
            'D': row['Choice D']
        }

        # True answer
        true_answer = row['Answer_label']

        # Make prediction
        result = predict_single_example(image_path, question, choices, true_answer)

        # Update counters
        if result['is_correct']:
            correct += 1
        total += 1

        # Print progress
        print(f"Example {i+1}/{len(samples)}: Predicted {result['predicted_answer']}, Actual {true_answer}, Correct: {result['is_correct']}")

    # Calculate accuracy
    accuracy = correct / total if total > 0 else 0
    print(f"\nEvaluation Accuracy: {accuracy:.4f} ({correct}/{total})")
else:
    print("Cannot find 'Image_Name' column in the dataframe. Please check your data structure.")

Evaluating model on 20 examples...
Example 1/20: Predicted A, Actual C, Correct: False
Example 2/20: Predicted A, Actual D, Correct: False
Example 3/20: Predicted A, Actual A, Correct: True
Example 4/20: Predicted A, Actual A, Correct: True
Example 5/20: Predicted A, Actual A, Correct: True
Example 6/20: Predicted A, Actual B, Correct: False
Example 7/20: Predicted A, Actual D, Correct: False
Example 8/20: Predicted A, Actual B, Correct: False
Example 9/20: Predicted A, Actual A, Correct: True
Example 10/20: Predicted A, Actual A, Correct: True
Example 11/20: Predicted A, Actual C, Correct: False
Example 12/20: Predicted A, Actual D, Correct: False
Example 13/20: Predicted A, Actual A, Correct: True
Example 14/20: Predicted A, Actual D, Correct: False
Example 15/20: Predicted A, Actual B, Correct: False
Example 16/20: Predicted A, Actual D, Correct: False
Example 17/20: Predicted A, Actual C, Correct: False
Example 18/20: Predicted A, Actual A, Correct: True
Example 19/20: Predicted A,

## Hyperparameter Tuning

In [None]:
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch.optim import AdamW
import os
from torch.optim.lr_scheduler import ReduceLROnPlateau
import random

# Create a subset of the data for training
train_subset = train_dataset
if len(train_dataset) > 5000:  # Increased from 1000 to 5000 for better training
    # Use a larger subset for training
    train_subset = torch.utils.data.Subset(train_dataset, list(range(5000)))
    print(f"Using a subset of {len(train_subset)} examples for training")
else:
    print(f"Using all {len(train_subset)} examples for training")

# Create a simpler data collator that just returns the batch
def simple_collator(batch):
    # Extract pixel values
    pixel_values = torch.stack([item["pixel_values"] for item in batch])

    # Process input_ids and attention_mask with padding
    max_len = max(len(item["input_ids"]) for item in batch)

    # Prepare padded tensors
    input_ids = []
    attention_mask = []

    for item in batch:
        # Pad input_ids
        ids = item["input_ids"]
        padding = torch.full((max_len - len(ids),), processor.tokenizer.pad_token_id, dtype=ids.dtype)
        input_ids.append(torch.cat([ids, padding]))

        # Pad attention_mask
        mask = item["attention_mask"]
        padding = torch.zeros(max_len - len(mask), dtype=mask.dtype)
        attention_mask.append(torch.cat([mask, padding]))

    # Stack tensors
    input_ids = torch.stack(input_ids)
    attention_mask = torch.stack(attention_mask)

    # Simple target: just use the first letter of the answer
    targets = []
    for item in batch:
        # Get the label
        label_str = processor.tokenizer.decode(item["labels"])
        # Extract first character (should be A, B, C, or D)
        if len(label_str) > 0 and label_str[0] in "ABCD":
            # Map A, B, C, D to 0, 1, 2, 3
            target = ord(label_str[0]) - ord('A')
        else:
            # Default to A if label is not recognized
            target = 0
        targets.append(target)

    targets = torch.tensor(targets, dtype=torch.long)

    return {
        "pixel_values": pixel_values,
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "targets": targets
    }

# Define a function to run inference on a single example
def predict_single_example(model, processor, device, image_path, question, choices, true_answer=None):
    """
    Run model prediction on a single example
    """
    # Load and process the image
    image = Image.open(image_path).convert('RGB')

    # Format choices
    choices_text = f"A: {choices['A']}\nB: {choices['B']}\nC: {choices['C']}\nD: {choices['D']}"

    # Create prompt with image token
    prompt = f"<image>\nQuestion: {question}\n\nOptions:\n{choices_text}\n\nPlease select the correct answer (A, B, C, or D):"

    # Process inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt")
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Generate prediction
    with torch.no_grad():
        outputs = model(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            pixel_values=inputs["pixel_values"]
        )

    # Get prediction
    logits = outputs.logits
    last_token_logits = logits[:, -1, :]
    relevant_logits = last_token_logits[:, :4]  # A, B, C, D
    prediction_idx = torch.argmax(relevant_logits, dim=1).item()

    # Convert to letter
    predicted_answer = chr(ord('A') + prediction_idx)

    # Check if correct
    is_correct = (predicted_answer == true_answer) if true_answer else None

    return {
        "predicted_answer": predicted_answer,
        "is_correct": is_correct
    }

# Function to evaluate the model
def evaluate_model(model, processor, device, train_df, image_dir, num_samples=50):
    """
    Evaluate the model on a sample of examples
    """
    model.eval()

    # Select a sample of examples for evaluation
    sample_indices = random.sample(range(len(train_df)), min(num_samples, len(train_df)))
    samples = [train_df.iloc[i] for i in sample_indices]

    # Track results
    correct = 0
    total = 0

    print(f"Evaluating model on {len(samples)} examples...")

    for i, row in enumerate(samples):
        # Get image path
        image_path = os.path.join(image_dir, row['Image_Name'])

        # Get question and choices
        question = row['Question']
        choices = {
            'A': row['Choice A'],
            'B': row['Choice B'],
            'C': row['Choice C'],
            'D': row['Choice D']
        }

        # True answer
        true_answer = row['Answer_label']

        # Make prediction
        result = predict_single_example(model, processor, device, image_path, question, choices, true_answer)

        # Update counters
        if result['is_correct']:
            correct += 1
        total += 1

    # Calculate accuracy
    accuracy = correct / total if total > 0 else 0
    print(f"Evaluation Accuracy: {accuracy:.4f} ({correct}/{total})")
    return accuracy

# Create dataloader with simple collator
batch_size = 4
dataloader = DataLoader(
    train_subset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=simple_collator
)

print(f"Created dataloader with batch size {batch_size}")

# Set up optimizer with a lower learning rate
optimizer = AdamW(model.parameters(), lr=1e-5)

# Setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop with more epochs
num_epochs = 5
print(f"Starting custom training loop for {num_epochs} epochs")

# Add learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=1, verbose=True)

# Create directory for checkpoints
os.makedirs("./checkpoints", exist_ok=True)

# Track best model
best_accuracy = 0.0
best_epoch = 0

model.train()
for epoch in range(num_epochs):
    total_loss = 0
    progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")

    for batch in progress_bar:
        # Move batch to device
        batch = {k: v.to(device) for k, v in batch.items()}

        # Extract targets
        targets = batch.pop("targets")

        # Zero gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            pixel_values=batch["pixel_values"],
        )

        # Compute loss
        logits = outputs.logits
        last_token_logits = logits[:, -1, :]
        relevant_logits = last_token_logits[:, :4]
        loss = torch.nn.functional.cross_entropy(relevant_logits, targets)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

        # Update progress bar
        total_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} - Average loss: {avg_loss:.4f}")

    # Update learning rate based on loss
    scheduler.step(avg_loss)

    # Evaluate after each epoch
    print(f"Evaluating after epoch {epoch+1}...")
    accuracy = evaluate_model(model, processor, device, train_df, image_dir, num_samples=50)

    # Save checkpoint if best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_epoch = epoch + 1

        # Save the best model
        checkpoint_dir = f"./checkpoints/epoch_{epoch+1}_acc_{accuracy:.4f}"
        os.makedirs(checkpoint_dir, exist_ok=True)
        model.save_pretrained(checkpoint_dir)
        print(f"✅ New best model saved! Accuracy: {accuracy:.4f}")

# Save the final model
output_dir = "./final_model/llava-med-lora-5epochs"
print(f"Saving final model to {output_dir}")
model.save_pretrained(output_dir)
processor.save_pretrained(output_dir)
print("Model saved successfully!")

print(f"Best model was from epoch {best_epoch} with accuracy {best_accuracy:.4f}")

Using a subset of 5000 examples for training
Created dataloader with batch size 4
Starting custom training loop for 5 epochs


Epoch 1/5: 100%|██████████| 1250/1250 [35:03<00:00,  1.68s/it, loss=9.24e-7]


Epoch 1/5 - Average loss: 0.0008
Evaluating after epoch 1...
Evaluating model on 50 examples...
Evaluation Accuracy: 0.3000 (15/50)
✅ New best model saved! Accuracy: 0.3000


Epoch 2/5:   9%|▉         | 110/1250 [4:36:40<48:20:57, 152.68s/it, loss=1.13e-6]

Evaluate Model

In [None]:
def calculate_accuracy(model, processor, device, test_df, image_dir, num_samples=None, detailed=False):
    """
    Calculate accuracy of the model on test data with detailed metrics

    Args:
        model: The fine-tuned model
        processor: The processor for inputs
        device: The device to run inference on
        test_df: Dataframe containing test examples
        image_dir: Directory containing images
        num_samples: Number of samples to evaluate (None for all)
        detailed: Whether to return detailed metrics

    Returns:
        accuracy: Overall accuracy
        metrics: Detailed metrics if detailed=True
    """
    model.eval()

    # Select samples for evaluation
    if num_samples is not None and num_samples < len(test_df):
        sample_indices = random.sample(range(len(test_df)), num_samples)
        samples = test_df.iloc[sample_indices]
    else:
        samples = test_df

    print(f"Evaluating model on {len(samples)} examples...")

    # Initialize counters
    correct = 0
    total = 0

    # Initialize detailed metrics if requested
    results = []
    confusion_matrix = {
        'A': {'A': 0, 'B': 0, 'C': 0, 'D': 0},
        'B': {'A': 0, 'B': 0, 'C': 0, 'D': 0},
        'C': {'A': 0, 'B': 0, 'C': 0, 'D': 0},
        'D': {'A': 0, 'B': 0, 'C': 0, 'D': 0}
    }

    # Track accuracy by question type (if available)
    question_types = {}

    # Process each sample
    progress_bar = tqdm(range(len(samples)), desc="Evaluating")
    for i in progress_bar:
        row = samples.iloc[i]

        # Get image path
        image_path = os.path.join(image_dir, row['Image_Name'])

        # Skip if image doesn't exist
        if not os.path.exists(image_path):
            print(f"Warning: Image not found: {image_path}")
            continue

        # Get question and choices
        question = row['Question']
        choices = {
            'A': row['Choice A'],
            'B': row['Choice B'],
            'C': row['Choice C'],
            'D': row['Choice D']
        }

        # True answer
        true_answer = row['Answer_label']

        # Make prediction
        try:
            # Load and process image
            image = Image.open(image_path).convert('RGB')

            # Format choices text
            choices_text = f"A: {choices['A']}\nB: {choices['B']}\nC: {choices['C']}\nD: {choices['D']}"

            # Create prompt with image token
            prompt = f"<image>\nQuestion: {question}\n\nOptions:\n{choices_text}\n\nPlease select the correct answer (A, B, C, or D):"

            # Process inputs
            inputs = processor(images=image, text=prompt, return_tensors="pt")
            inputs = {k: v.to(device) for k, v in inputs.items()}

            # Generate prediction
            with torch.no_grad():
                outputs = model(
                    input_ids=inputs["input_ids"],
                    attention_mask=inputs["attention_mask"],
                    pixel_values=inputs["pixel_values"]
                )

            # Get prediction
            logits = outputs.logits
            last_token_logits = logits[:, -1, :]
            relevant_logits = last_token_logits[:, :4]  # A, B, C, D
            prediction_idx = torch.argmax(relevant_logits, dim=1).item()
            predicted_answer = chr(ord('A') + prediction_idx)

            # Record result
            is_correct = (predicted_answer == true_answer)
            if is_correct:
                correct += 1
            total += 1

            # Update confusion matrix
            confusion_matrix[true_answer][predicted_answer] += 1

            # Track by question type if we can extract it
            # Simple heuristic: first few words of question
            question_type = question.split()[0] if len(question.split()) > 0 else "Unknown"
            if question_type not in question_types:
                question_types[question_type] = {"correct": 0, "total": 0}
            question_types[question_type]["total"] += 1
            if is_correct:
                question_types[question_type]["correct"] += 1

            # Store detailed result if requested
            if detailed:
                results.append({
                    "id": i,
                    "question": question,
                    "true_answer": true_answer,
                    "predicted_answer": predicted_answer,
                    "is_correct": is_correct,
                    "image_path": image_path
                })

        except Exception as e:
            print(f"Error processing example {i}: {e}")

    # Calculate overall accuracy
    overall_accuracy = correct / total if total > 0 else 0
    print(f"Overall Accuracy: {overall_accuracy:.4f} ({correct}/{total})")

    # Print confusion matrix
    print("\nConfusion Matrix:")
    print("  | A  | B  | C  | D  |")
    print("--|----|----|----|----|")
    for true_label in ['A', 'B', 'C', 'D']:
        row_str = f"{true_label} |"
        for pred_label in ['A', 'B', 'C', 'D']:
            count = confusion_matrix[true_label][pred_label]
            row_str += f" {count:2d} |"
        print(row_str)

    # Print accuracy by question type
    print("\nAccuracy by Question Type:")
    for qtype, stats in sorted(question_types.items(),
                              key=lambda x: x[1]["total"],
                              reverse=True):
        if stats["total"] >= 5:  # Only show types with at least 5 examples
            type_acc = stats["correct"] / stats["total"]
            print(f"{qtype}: {type_acc:.4f} ({stats['correct']}/{stats['total']})")

    # Return results
    if detailed:
        return {
            "accuracy": overall_accuracy,
            "confusion_matrix": confusion_matrix,
            "question_types": question_types,
            "detailed_results": results
        }
    else:
        return overall_accuracy

In [None]:
# Evaluate on test set (or a subset for faster evaluation)
accuracy_metrics = calculate_accuracy(
    model=model,
    processor=processor,
    device=device,
    test_df=train_df,  # Using training data since it has the right structure
    image_dir=image_dir,
    num_samples=100,   # Number of samples to evaluate
    detailed=True      # Get detailed metrics
)

print(f"Final model accuracy: {accuracy_metrics['accuracy']:.4f}")

# Identify most challenging question types
print("\nQuestion Types with Lowest Accuracy:")
sorted_types = sorted(
    [(qtype, stats["correct"]/stats["total"], stats["total"])
     for qtype, stats in accuracy_metrics["question_types"].items()
     if stats["total"] >= 5],
    key=lambda x: x[1]
)

for qtype, acc, total in sorted_types[:5]:
    print(f"{qtype}: {acc:.4f} ({total} examples)")