In [1]:
import pandas as pd
import json
import os

In [122]:
def extract_and_order_by_text(all_options, answer):
    """
    Extracts options found in the answer string and orders them based on
    their first appearance sequence in the answer string.
    """
    # 1. Find the starting index for every option found in the answer
    found_options_with_index = []
    
    for option in all_options:
        # str.find() returns the index of the first occurrence, or -1 if not found.
        index = answer.find(option)
        
        if index != -1:
            # Store the option along with its starting index
            found_options_with_index.append((index, option))
            
    # 2. Sort the list based on the index (the first item in the tuple)
    # This automatically sorts them in the order they appear in the text.
    found_options_with_index.sort(key=lambda item: item[0])
    
    # 3. Extract just the options (the second item in the tuple)
    extracted_list = [option for index, option in found_options_with_index]
    
    return extracted_list

In [74]:
file_path = "out/Qwen2.5-VL-3B-Instruct_all_cls_expert_closed.jsonl"

model_answers = pd.read_json(file_path, lines=True)
print(len(model_answers))

6330


In [75]:
model_answers.head()

Unnamed: 0,index,question,options,image_path,image_scale,scaled_width,scaled_height,dataset,class_label,answer
0,36282,Review this histopathology image of a human sp...,"['A) Estrogen receptor Negative', 'B) None of ...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,"The image provided is a histopathology slide, ..."
1,36292,Review this histopathology image of a human sp...,"['A) Estrogen receptor Positive', 'B) Estrogen...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,To accurately determine the class label for th...
2,36304,Review this histopathology image of a human sp...,"['A) Estrogen receptor Positive', 'B) Estrogen...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,To accurately determine the class label for th...
3,36321,Review this histopathology image of a human sp...,"['A) None of the above', 'B) Estrogen receptor...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,"The image provided is a histopathology image, ..."
4,36326,Review this histopathology image of a human sp...,"['A) None of the above', 'B) Estrogen receptor...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,To accurately determine the class label for th...


In [76]:
task     = "all_cls"
mbu_root = f"/pasteur/u/rdcunha/data_cache/mmbu/final_data/VLMEvalData_v2/LMUData/{task}"

data_root= os.path.join(mbu_root, 'all_cls_closed_subsampled.tsv')

data = pd.read_csv(data_root,sep='\t')
filtered_ = data[data["question_type"] == "expert"]
df_ = filtered_[~filtered_["dataset"].isin([ "isic2018",'herlev',"breakhis_400x","breakhis_200x"])]

In [77]:

df_ = df_.rename(columns={"answer": "correct_answer_choice"})
df_[["index","correct_answer_choice","modality"]].head()

Unnamed: 0,index,correct_answer_choice,modality
0,36282,A,histopathology
1,36292,B,histopathology
2,36304,B,histopathology
3,36321,C,histopathology
4,36326,B,histopathology


In [78]:
merged = pd.merge(
    model_answers,
    df_[["index","correct_answer_choice","modality"]],
    on="index",
    how="inner"
)


In [79]:
merged.head()

Unnamed: 0,index,question,options,image_path,image_scale,scaled_width,scaled_height,dataset,class_label,answer,correct_answer_choice,modality
0,36282,Review this histopathology image of a human sp...,"['A) Estrogen receptor Negative', 'B) None of ...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,"The image provided is a histopathology slide, ...",A,histopathology
1,36292,Review this histopathology image of a human sp...,"['A) Estrogen receptor Positive', 'B) Estrogen...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,To accurately determine the class label for th...,B,histopathology
2,36304,Review this histopathology image of a human sp...,"['A) Estrogen receptor Positive', 'B) Estrogen...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,To accurately determine the class label for th...,B,histopathology
3,36321,Review this histopathology image of a human sp...,"['A) None of the above', 'B) Estrogen receptor...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,"The image provided is a histopathology image, ...",C,histopathology
4,36326,Review this histopathology image of a human sp...,"['A) None of the above', 'B) Estrogen receptor...",/pasteur/u/rdcunha/data_cache/mmbu/final_data/...,1.0,256,256,BCNB_Task5,Estrogen receptor Negative,To accurately determine the class label for th...,B,histopathology


In [119]:
idx = 1

In [120]:

all_options = ast.literal_eval(merged.iloc[idx]["options"])
correct_class = merged.iloc[idx]["class_label"]
correct_option = merged.iloc[idx]["correct_answer_choice"]
incorrect_options =[ option for option in all_options if correct_class not  in option  ]

model_answer =  merged.iloc[idx]["answer"] 

print(model_answer)
print("\n\ncorrect:")
print(correct_class)

To accurately determine the class label for this histopathology image, I would need to review the specific characteristics visible in the image. However, based on common patterns observed in breast cancer histopathology, if the image shows typical features such as ductal carcinoma in situ (DCIS), it might be classified as:

- **Estrogen receptor Positive** if the tumor cells express estrogen receptors.

If the image shows invasive ductal carcinoma without significant stromal invasion or if there are no clear signs of estrogen receptor expression, it might be classified as:

- **Estrogen receptor Negative**.

Without seeing the actual image, I cannot provide a


correct:
Estrogen receptor Negative


In [None]:
extract_and_order_by_text(all_options, model_answer):

In [51]:
model_answer = "B adfa"

In [121]:
correct = 0 
# When option is a letter:
if len(model_answer)  < len(correct_class):
    if correct_option  in  model_answer:
        correct = 1
        
# When reasoning is performed:
elif len(model_answer)  > 4*len(correct_class):
    print("a")
 
    extracted_list = extract_mentioned_options(all_options, model_answer)
    if correct_class in extracted_list[-1]:
        correct = 1
        
# When options is the class
else:   
    if correct_class in model_answer:
         correct  = 1

a


IndexError: list index out of range

In [116]:
correct

0

In [118]:
extracted_list[-1]

'B) None of the above'

In [117]:
correct_class

'Estrogen receptor Negative'

In [27]:
incorrect_options

['B) None of the above', 'C) Estrogen receptor Positive']

In [None]:
import pandas as pd

def check_answer_match(row):
    """
    Check if the correct answer is contained in the model's answer.
    Extend this function later for more complex logic.
    """
    correct = str(row["correct_answer"]).strip().lower()
    model = str(row["model_answer"]).strip().lower()

    # Basic containment check
    return correct in model



# Apply the function
merged["is_correct"] = merged.apply(check_answer_match, axis=1)

print(merged)