In [1]:
import os
import sys

sys.path.append(os.path.abspath(("src")))

import Rag
import pandas as pd
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embedder_names = [
    "emilyalsentzer/Bio_ClinicalBERT",
    "Qwen/Qwen3-Embedding-0.6B",
    "all-MiniLM-L6-v2",
    "jinaai/jina-embeddings-v3",
    "abhinand/MedEmbed-large-v0.1",
    "BAAI/bge-base-en-v1.5",
    "BAAI/bge-large-en-v1.5",
    "BAAI/bge-small-en-v1.5",
    "intfloat/multilingual-e5-base",
    "sentence-transformers/all-mpnet-base-v2",
    'pritamdeka/S-PubMedBert-MS-MARCO',
    'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext',
]


In [3]:
qa_pairs = pd.read_csv("evaluation/QA_pairs_for_eval/provided_qa_pairs.csv")

# Table Reference Match Evaluation

- the first 20 rows of qa pairs have a clear source Table(chunk id)
- So we're going to check whether the results return by retriever(using different embedder) actually contains the source table or not
- with cosine similarity faiss_search

In [4]:
first_20 = qa_pairs[:20]
test = embedder_names[0]
test

'emilyalsentzer/Bio_ClinicalBERT'

In [8]:
first_20

Unnamed: 0,section,description,query,answer,source,chunk_id
0,,Lifestyle intervention tx options,My patient is interested in lifestyle interven...,Supervised exercise (low to moderate intensity...,From Table 2.4 in CANMAT 2023 depression guide...,60
1,,Lifestyle intervention tx options,My patient has a major depressive episode with...,"Light therapy (10,000 lux white light for 30 m...",From Table 2.4 in CANMAT 2023 depression guide...,60
2,,Depression symptom dimension tx options,My patient has a major depressive episode with...,Duloxetine,From Table 3.6 in CANMAT 2023 depression guide...,99
3,,Depression symptom dimension tx options,My patient has a major depressive episode with...,Bupropion,From Table 3.6 in CANMAT 2023 depression guide...,99
4,,Psychotherapy,My patient with a major depressive episode of ...,"Cognitive behavioural therapy, interpersonal t...",From Table 3.2 in CANMAT 2023 depression guide...,79
5,,Complementary and alternative medicine,My patient with a major depressive episode of ...,St. John's Wort,From Table 3.7 in CANMAT 2023 depression guide...,65
6,,Recommendations for disease severity,My patient is presenting with a mild severity ...,"Psychotherapy because of fewer side effects, a...",From Table 3.1 in CANMAT 2023 depresion guidel...,71
7,,Failure to achieve response depression,My patient's depression has not responded to t...,Optimize the dose,From Figure 7.1 in CANMAT 2023 depression guid...,186
8,,Failure to achieve response depression,My patient's depression has not responded to t...,"If available, consider referral to psychothera...",From Figure 7.1 in CANMAT 2023 depression guid...,186
9,,Failure to achieve response depression,My patient's depression has not responded to t...,Since there is tolerability issues and no resp...,From Figure 7.1 in CANMAT 2023 depression guid...,186


In [5]:
Rag.launch_depression_assistant(test)

[Attempt 1] Loading embedder 'emilyalsentzer/Bio_ClinicalBERT' with default sentence transformer
Using device: cpu


No sentence-transformers model found with name emilyalsentzer/Bio_ClinicalBERT. Creating a new one with mean pooling.


[Time] Embedding model loaded in 3.78 seconds.
FAISS index for emilyalsentzer/Bio_ClinicalBERT already exists. Loading it...
Loaded FAISS index from data/faiss_index/emilyalsentzer_Bio_ClinicalBERT_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.00 seconds.
---------Depression Assistant is ready to use!--------------




In [6]:
embedder_accuracy_k = []
results = []
for embedder in embedder_names:
    print(f"==============={embedder}===============")
    Rag.launch_depression_assistant(embedder)
    
    for k in range(3, 7):
        correct_count = 0
        for i, query in enumerate(first_20["query"]):
            true_id = int(first_20["chunk_id"][i])
            result = Rag.faiss_search(query, Rag.embedder, Rag.db, Rag.index, Rag.referenced_tables_db, k=k)
            results.append({
                "query": str(i) + query,
                "chunk_id": true_id,
                "result": result,
                "k": k,
                "embedder": embedder
            })
            print(f"--{i+2}. True Source: {first_20['source'][i]}--")
            if any(true_id == item["chunk_id"] for item in result):
            # true_id found in one of the chunk_id values
                correct_count += 1
                print("✅✅✅ Correct! Found the Table! ✅✅✅")
        accuracy = correct_count / len(first_20)
        print("\n\n\nCorrect count:", correct_count)
        print(f"Accuracy for {embedder} with k={k}: {accuracy:.2f}\n\n\n")
        embedder_accuracy_k.append({
            "embedder": embedder,
            "k": k,
            "accuracy": accuracy
        })
    
embedder_accuracy_k_df = pd.DataFrame(embedder_accuracy_k)

No sentence-transformers model found with name emilyalsentzer/Bio_ClinicalBERT. Creating a new one with mean pooling.


[Attempt 1] Loading embedder 'emilyalsentzer/Bio_ClinicalBERT' with default sentence transformer
Using device: cpu
[Time] Embedding model loaded in 3.17 seconds.
FAISS index for emilyalsentzer/Bio_ClinicalBERT already exists. Loading it...
Loaded FAISS index from data/faiss_index/emilyalsentzer_Bio_ClinicalBERT_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.00 seconds.
---------Depression Assistant is ready to use!--------------


existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--2. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--3. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_3_3', 'table_a']
--4. True Source: From Table 3.6 in CANMAT 2023 depression guidelines-

No sentence-transformers model found with name microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext. Creating a new one with mean pooling.


[Time] Embedding model loaded in 4.52 seconds.
FAISS index for microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext already exists. Loading it...
Loaded FAISS index from data/faiss_index/microsoft_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.00 seconds.
---------Depression Assistant is ready to use!--------------


existed tables: set()
Tables to add: ['table_a']
--2. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
existed tables: set()
Tables to add: ['table_a']
--3. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
existed tables: set()
Tables to add: ['table_a']
--4. True Source: From Table 3.6 in CANMAT 2023 depression guidelines--
existed tables: set()
Tables to add: ['table_a']
--5. True Source: From Table 3.6 in CANMAT 2023 depression guidelines--
existed tables: set()
Tables to add: ['

In [10]:
embedder_accuracy_k_df = pd.DataFrame(embedder_accuracy_k)
embedder_accuracy_k_df
# rank by accuracy
embedder_accuracy_k_df.sort_values(by=["k","accuracy"], ascending=[True,False])
embedder_accuracy_k_df.sort_values(by=["accuracy"], ascending=[False])


Unnamed: 0,embedder,k,accuracy
39,sentence-transformers/all-mpnet-base-v2,6,0.85
7,Qwen/Qwen3-Embedding-0.6B,6,0.85
15,jinaai/jina-embeddings-v3,6,0.8
31,BAAI/bge-small-en-v1.5,6,0.8
5,Qwen/Qwen3-Embedding-0.6B,4,0.8
6,Qwen/Qwen3-Embedding-0.6B,5,0.8
29,BAAI/bge-small-en-v1.5,4,0.75
14,jinaai/jina-embeddings-v3,5,0.75
28,BAAI/bge-small-en-v1.5,3,0.75
30,BAAI/bge-small-en-v1.5,5,0.75


In [11]:
for embedder in embedder_names:
    print(f"==============={embedder}===============")
    Rag.launch_depression_assistant(embedder)
    
    for k in range(7,10):
        correct_count = 0
        for i, query in enumerate(first_20["query"]):
            true_id = int(first_20["chunk_id"][i])
            result = Rag.faiss_search(query, Rag.embedder, Rag.db, Rag.index, Rag.referenced_tables_db, k=k)
            results.append({
                "query": str(i) + query,
                "chunk_id": true_id,
                "result": result,
                "k": k,
                "embedder": embedder
            })
            print(f"--{i+2}. True Source: {first_20['source'][i]}--")
            if any(true_id == item["chunk_id"] for item in result):
            # true_id found in one of the chunk_id values
                correct_count += 1
                print("✅✅✅ Correct! Found the Table! ✅✅✅")
        accuracy = correct_count / len(first_20)
        print("\n\n\nCorrect count:", correct_count)
        print(f"Accuracy for {embedder} with k={k}: {accuracy:.2f}\n\n\n")
        embedder_accuracy_k.append({
            "embedder": embedder,
            "k": k,
            "accuracy": accuracy
        })
    
embedder_accuracy_k_df = pd.DataFrame(embedder_accuracy_k)

[Attempt 1] Loading embedder 'emilyalsentzer/Bio_ClinicalBERT' with default sentence transformer
Using device: cpu


No sentence-transformers model found with name emilyalsentzer/Bio_ClinicalBERT. Creating a new one with mean pooling.


[Time] Embedding model loaded in 2.22 seconds.
FAISS index for emilyalsentzer/Bio_ClinicalBERT already exists. Loading it...
Loaded FAISS index from data/faiss_index/emilyalsentzer_Bio_ClinicalBERT_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.00 seconds.
---------Depression Assistant is ready to use!--------------


existed tables: set()
Tables to add: ['table_2_4', 'table_a', 'table_3_4']
--2. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--3. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_3_3', 'table_1_1', 'table_1_2', 'table_a']
--4. True Source: From Table 3.6 in CANMAT 2023 depression guidelines--
existed tables: set()
Tables to add: ['table_3_3', 'table_a']
--5. True So

No sentence-transformers model found with name microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext. Creating a new one with mean pooling.


[Time] Embedding model loaded in 3.15 seconds.
FAISS index for microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext already exists. Loading it...
Loaded FAISS index from data/faiss_index/microsoft_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.00 seconds.
---------Depression Assistant is ready to use!--------------


existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--2. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--3. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_a', 'table_3_6']
--4. True Source: From Table 3.6 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set

In [14]:
embedder_accuracy_k_df = pd.DataFrame(embedder_accuracy_k)
embedder_accuracy_k_df
# rank by accuracy
embedder_accuracy_k_df.sort_values(by=["k","accuracy"], ascending=[False,False])
# embedder_accuracy_k_df.sort_values(by=["accuracy"], ascending=[False])

Unnamed: 0,embedder,k,accuracy
71,BAAI/bge-small-en-v1.5,9,0.95
53,Qwen/Qwen3-Embedding-0.6B,9,0.90
59,jinaai/jina-embeddings-v3,9,0.90
77,sentence-transformers/all-mpnet-base-v2,9,0.90
65,BAAI/bge-base-en-v1.5,9,0.85
...,...,...,...
20,BAAI/bge-base-en-v1.5,3,0.50
32,intfloat/multilingual-e5-base,3,0.40
40,pritamdeka/S-PubMedBert-MS-MARCO,3,0.35
0,emilyalsentzer/Bio_ClinicalBERT,3,0.15


In [15]:
for embedder in embedder_names:
    print(f"==============={embedder}===============")
    Rag.launch_depression_assistant(embedder)
    
    for k in range(10,20):
        correct_count = 0
        for i, query in enumerate(first_20["query"]):
            true_id = int(first_20["chunk_id"][i])
            result = Rag.faiss_search(query, Rag.embedder, Rag.db, Rag.index, Rag.referenced_tables_db, k=k)
            results.append({
                "query": str(i) + query,
                "chunk_id": true_id,
                "result": result,
                "k": k,
                "embedder": embedder
            })
            print(f"--{i+2}. True Source: {first_20['source'][i]}--")
            if any(true_id == item["chunk_id"] for item in result):
            # true_id found in one of the chunk_id values
                correct_count += 1
                print("✅✅✅ Correct! Found the Table! ✅✅✅")
        accuracy = correct_count / len(first_20)
        print("\n\n\nCorrect count:", correct_count)
        print(f"Accuracy for {embedder} with k={k}: {accuracy:.2f}\n\n\n")
        embedder_accuracy_k.append({
            "embedder": embedder,
            "k": k,
            "accuracy": accuracy
        })
    
embedder_accuracy_k_df = pd.DataFrame(embedder_accuracy_k)

[Attempt 1] Loading embedder 'emilyalsentzer/Bio_ClinicalBERT' with default sentence transformer
Using device: cpu


No sentence-transformers model found with name emilyalsentzer/Bio_ClinicalBERT. Creating a new one with mean pooling.


[Time] Embedding model loaded in 5.33 seconds.
FAISS index for emilyalsentzer/Bio_ClinicalBERT already exists. Loading it...
Loaded FAISS index from data/faiss_index/emilyalsentzer_Bio_ClinicalBERT_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.00 seconds.
---------Depression Assistant is ready to use!--------------


existed tables: set()
Tables to add: ['table_3_7', 'table_2_4', 'table_a', 'table_3_4']
--2. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_3_3', 'table_2_4', 'table_6_1', 'table_a']
--3. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_3_3', 'table_1_1', 'table_1_2', 'table_a']
--4. True Source: From Table 3.6 in CANMAT 2023 depression guidelines--
existed tables: set()
Tables to add

No sentence-transformers model found with name microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext. Creating a new one with mean pooling.


[Time] Embedding model loaded in 2.96 seconds.
FAISS index for microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext already exists. Loading it...
Loaded FAISS index from data/faiss_index/microsoft_BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext_index.faiss.
[Time] FAISS index loaded in 0.00 seconds.
No LLM client provided. Loading Together LLM client...
[Time] LLM client initiated in 0.02 seconds.
---------Depression Assistant is ready to use!--------------


existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--2. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_2_4', 'table_a']
--3. True Source: From Table 2.4 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Table! ✅✅✅
existed tables: set()
Tables to add: ['table_2_4', 'table_6_1', 'table_a', 'table_3_6']
--4. True Source: From Table 3.6 in CANMAT 2023 depression guidelines--
✅✅✅ Correct! Found the Tabl

In [17]:
embedder_accuracy_k_df = pd.DataFrame(embedder_accuracy_k)
embedder_accuracy_k_df
# rank by accuracy
embedder_accuracy_k_df.sort_values(by=["k","accuracy"], ascending=[False,False])
embedder_accuracy_k_df.sort_values(by=["accuracy"], ascending=[False])

Unnamed: 0,embedder,k,accuracy
163,BAAI/bge-small-en-v1.5,19,1.00
160,BAAI/bge-small-en-v1.5,16,1.00
179,sentence-transformers/all-mpnet-base-v2,15,1.00
178,sentence-transformers/all-mpnet-base-v2,14,1.00
182,sentence-transformers/all-mpnet-base-v2,18,1.00
...,...,...,...
45,microsoft/BiomedNLP-PubMedBERT-base-uncased-ab...,4,0.20
1,emilyalsentzer/Bio_ClinicalBERT,4,0.15
2,emilyalsentzer/Bio_ClinicalBERT,5,0.15
0,emilyalsentzer/Bio_ClinicalBERT,3,0.15


In [18]:
# write to csv
embedder_accuracy_k_df.to_csv("embedder_accuracy_k.csv", index=False)

In [20]:
with open("results_with_different_k.json", "w") as f:
    json.dump(results, f, indent=4)