In [145]:
import pandas as pd
import json
import os

from config import MAIN_DIR
from typing import Sequence, Any, Dict
from collections import Counter

import numpy as np
from llama_index.schema import Document, MetadataMode

In [185]:
ARTIFACT_DIR = os.path.join(MAIN_DIR, "artifacts")

In [None]:
def majority_vote(
    data: Sequence  
) -> Any:
    counter = Counter(data)
    return max(counter, key=counter.get)

def rename_column(
    column: pd.Series,
    rename_dict: Dict = {
        "USUALLY APPROPRIATE": "UA/MBA",
        "MAY BE APPROPRIATE": "UA/MBA",
        "USUALLY NOT APPROPRIATE": "UNA",
        "INSUFFICIENT INFORMATION": "ICI",
        "UA": "UA/MBA", "MBA": "UA/MBA",
        }
) -> pd.Series:
    return column.replace(rename_dict)

# C-LLM 

In [186]:
save_path = os.path.join("..", "artifacts", "cllm_retrieval_analysis.xlsx")

data_folder = os.path.join(
    ARTIFACT_DIR, "gpt-4_512_20_Combined_retrieval_with_metadata_filter-n=6_11-03-2024-18-32"
    )

In [183]:
retrieved_df = None
all_raw_answers = {}

for run_idx in range(5):
    with open(os.path.join(cllm_folder, f"results_{run_idx+1}.json"), "r") as f:
        run_results = json.load(f)
            
    if retrieved_df is None:
        for result in run_results:
            for doc in result["source_documents"][-5:]:
                doc["metadata"]["mode"] = "text"        

        doc_list = [[None] * len(run_results) for _ in range(9)]

        for case_idx, case_results in enumerate(run_results):
            source_docs = case_results["source_documents"]
            retrieved_table_docs = []
            retrieved_text_docs = []
            for source_doc in source_docs:
                source_doc_obj = Document(
                    text = source_doc["page_content"], metadata = source_doc["metadata"]
                )
                doc_content = source_doc_obj.get_content(metadata_mode=MetadataMode.ALL)
                if source_doc_obj.metadata["mode"] == "tabular":
                    retrieved_table_docs.append(doc_content)
                else:
                    retrieved_text_docs.append(doc_content)

            for table_idx, retrieved_table_doc in enumerate(retrieved_table_docs):
                doc_list[table_idx][case_idx] = retrieved_table_doc
            for text_idx, retrieved_text_doc in enumerate(retrieved_text_docs):
                doc_list[text_idx + 4][case_idx] = retrieved_text_doc
        
        retrieved_df = pd.DataFrame(
            np.array(doc_list).transpose(1, 0),
            columns=[
                "Table_1", "Table_2", "Table_3", "Table_4",
                "Text_1", "Text_2", "Text_3", "Text_4", "Text_5"
                ]
            )
        
        case_info_df = pd.read_csv(
            os.path.join(cllm_folder, "result_1.csv"),
            usecols = ["Clinical File", "ACR scenario", "MRI scan ordered", "human_gt"] 
        )
        case_info_df["human_gt"] = rename_column(case_info_df["human_gt"])
        
    run_raw_answers = []
    classifications = []
    
    for case_idx, case_result in enumerate(run_results):
        run_raw_answers.append(case_result["result"])
        classification = case_result["appropriateness"].upper()
        if classification == "":
            classification = "INSUFFICIENT INFORMATION"
        assert classification in ["USUALLY APPROPRIATE", "MAY BE APPROPRIATE", "USUALLY NOT APPROPRIATE", "INSUFFICIENT INFORMATION"], f"{classification} is not allowed"
        classifications.append(classification)
    
    retrieved_df[f"GPT_Classification_{run_idx+1}"] = classifications
    retrieved_df[f"GPT_Classification_{run_idx+1}"] = rename_column(retrieved_df[f"GPT_Classification_{run_idx+1}"])

    all_raw_answers[f"Raw Answer {run_idx+1}"] = run_raw_answers

retrieved_df = pd.concat([case_info_df, retrieved_df], axis=1) 
retrieved_df["GPT_Majority_Vote"] = retrieved_df[[f"GPT_Classification_{idx+1}" for idx in range(5)]]\
                                    .apply(lambda x: majority_vote(x), axis = 1)
retrieved_df["Answer Correctness"] = np.where(retrieved_df["GPT_Majority_Vote"] == retrieved_df["human_gt"], "CORRECT", "INCORRECT")

retrieved_df = retrieved_df[
    [
        'Clinical File',
        'ACR scenario',
        'MRI scan ordered',
        'Table_1', 'Table_2', 'Table_3', 'Table_4',
        'Text_1', 'Text_2', 'Text_3', 'Text_4', 'Text_5',
        'Answer Correctness', 'human_gt', 'GPT_Majority_Vote',
        'GPT_Classification_1', 'GPT_Classification_2', 'GPT_Classification_3', 'GPT_Classification_4', 'GPT_Classification_5',
       ]   
]

raw_answer_df = pd.DataFrame(all_raw_answers)

In [187]:
with pd.ExcelWriter(save_path) as writer:
    final_df = retrieved_df.drop(index=5)
    final_df.to_excel(writer, sheet_name="retrieval_summary", engine='xlsxwriter', index=False)
    final_raw_answer_df = raw_answer_df.drop(index=5)
    final_raw_answer_df.to_excel(writer, sheet_name="raw_answers", engine='xlsxwriter', index=False)

# Base RAG

In [193]:
save_path = os.path.join("..", "artifacts", "baserag_retrieval_analysis.xlsx")

data_folder = os.path.join(
    ARTIFACT_DIR, "gpt-4_512_20_base_rau_rag-n=5-k=3_11-04-2024-10-59"
    )

In [194]:
retrieved_df = None
all_raw_answers = {}

for run_idx in range(5):
    with open(os.path.join(data_folder, f"results_{run_idx+1}.json"), "r") as f:
        run_results = json.load(f)
            
    if retrieved_df is None:
        for result in run_results:
            for doc in result["source_documents"][-5:]:
                doc["metadata"]["mode"] = "text"        

        doc_list = [[None] * len(run_results) for _ in range(9)]

        for case_idx, case_results in enumerate(run_results):
            source_docs = case_results["source_documents"]
            retrieved_table_docs = []
            retrieved_text_docs = []
            for source_doc in source_docs:
                source_doc_obj = Document(
                    text = source_doc["page_content"], metadata = source_doc["metadata"]
                )
                doc_content = source_doc_obj.get_content(metadata_mode=MetadataMode.ALL)
                if source_doc_obj.metadata["mode"] == "tabular":
                    retrieved_table_docs.append(doc_content)
                else:
                    retrieved_text_docs.append(doc_content)

            for table_idx, retrieved_table_doc in enumerate(retrieved_table_docs):
                doc_list[table_idx][case_idx] = retrieved_table_doc
            for text_idx, retrieved_text_doc in enumerate(retrieved_text_docs):
                doc_list[text_idx + 4][case_idx] = retrieved_text_doc
        
        retrieved_df = pd.DataFrame(
            np.array(doc_list).transpose(1, 0),
            columns=[
                "Table_1", "Table_2", "Table_3", "Table_4",
                "Text_1", "Text_2", "Text_3", "Text_4", "Text_5"
                ]
            )
        
        case_info_df = pd.read_csv(
            os.path.join(cllm_folder, "result_1.csv"),
            usecols = ["Clinical File", "ACR scenario", "MRI scan ordered", "human_gt"] 
        )
        case_info_df["human_gt"] = rename_column(case_info_df["human_gt"])
        
    run_raw_answers = []
    classifications = []
    
    for case_idx, case_result in enumerate(run_results):
        run_raw_answers.append(case_result["result"])
        classification = case_result["appropriateness"].upper()
        if classification == "":
            classification = "INSUFFICIENT INFORMATION"
        assert classification in ["USUALLY APPROPRIATE", "MAY BE APPROPRIATE", "USUALLY NOT APPROPRIATE", "INSUFFICIENT INFORMATION"], f"{classification} is not allowed"
        classifications.append(classification)
    
    retrieved_df[f"GPT_Classification_{run_idx+1}"] = classifications
    retrieved_df[f"GPT_Classification_{run_idx+1}"] = rename_column(retrieved_df[f"GPT_Classification_{run_idx+1}"])

    all_raw_answers[f"Raw Answer {run_idx+1}"] = run_raw_answers

retrieved_df = pd.concat([case_info_df, retrieved_df], axis=1) 
retrieved_df["GPT_Majority_Vote"] = retrieved_df[[f"GPT_Classification_{idx+1}" for idx in range(5)]]\
                                    .apply(lambda x: majority_vote(x), axis = 1)
retrieved_df["Answer Correctness"] = np.where(retrieved_df["GPT_Majority_Vote"] == retrieved_df["human_gt"], "CORRECT", "INCORRECT")

retrieved_df = retrieved_df[
    [
        'Clinical File',
        'ACR scenario',
        'MRI scan ordered',
        'Table_1', 'Table_2', 'Table_3', 'Table_4',
        'Text_1', 'Text_2', 'Text_3', 'Text_4', 'Text_5',
        'Answer Correctness', 'human_gt', 'GPT_Majority_Vote',
        'GPT_Classification_1', 'GPT_Classification_2', 'GPT_Classification_3', 'GPT_Classification_4', 'GPT_Classification_5',
       ]   
]

raw_answer_df = pd.DataFrame(all_raw_answers)

In [198]:
with pd.ExcelWriter(save_path) as writer:
    final_df = retrieved_df.drop(index=5)
    final_df.to_excel(writer, sheet_name="retrieval_summary", engine='xlsxwriter', index=False)
    final_raw_answer_df = raw_answer_df.drop(index=5)
    final_raw_answer_df.to_excel(writer, sheet_name="raw_answers", engine='xlsxwriter', index=False)