In [19]:
import util
import os
from tqdm import tqdm
import chatbot
import re
import evaluation as eval

In [20]:
os.environ["TOKENIZERS_PARALLELISM"] = "false"
query_path = "query.txt"

# Respond to a Query

In [21]:
def respond(
        query,
        verbose=1):

    response = chatbot.disease_list_prediction_without_RAG(query)
    pattern = r'(Disease name \d+:\s*.*?)(?=Disease name \d+:|$)'
    predictions = re.findall(pattern, response, flags=re.DOTALL)

    for prediction in predictions:
        if verbose:
            print(prediction)

    return predictions

In [22]:
query = util.read_query(query_path).replace('\n', "").strip()
predictions = respond(query, 1)

Disease name 1: Ameloblastoma
Disease 1 description: Central, unilocular radiolucent lesion in the mandible with root resorption, tooth displacement, and expansion to the bony cortex.

Disease name 2: Odontogenic keratocyst
Disease 2 description: Unilocular radiolucent lesion with defined but not corticated borders in the mandible, commonly seen in the incisor region.

Disease name 3: Dentigerous cyst
Disease 3 description: Unilocular radiolucent lesion associated with impacted teeth, with expansion to the bony cortex and displacement of adjacent teeth.


# Evaluation

In [23]:
def list_respond(
        qa_pairs, 
        prediction_record_path
    ):
    
    truths = []
    queries = []
    for pair in qa_pairs:
        truths.append(pair['answer'])
        queries.append(pair['query'])

    print("Generating responses.")
    prediction_record = []
        
    for i, query in tqdm(enumerate(queries)):
        cleaned_query = query.replace('\n', "")
        cleaned_query = cleaned_query.strip()
        symptom_prediction = None
        disease_names = []
        while disease_names == [] or not symptom_prediction:
            symptom_prediction = respond(
                cleaned_query, 
                verbose=0
            )
            for prediction in symptom_prediction:  
                match = re.search(r'Disease name \d+:\s*(.*?)\s*Disease \d+ description:', prediction, re.DOTALL)
                if match:
                    disease_names.append(match.group(1).strip())

        record = {
            'index': i,
            'original_prediction': symptom_prediction,
            'disease_prediction': disease_names,
            'true_disease': truths[i]
        }

        prediction_record.append(record)

    util.save_as_json(prediction_record, prediction_record_path)

In [24]:
def disease_recall(record_path, sparse_sim_threshold, score_path):
    prediction_record = util.load_json(record_path)
    total_correct = 0
    new_record = []
    for entry in tqdm(prediction_record):
        disease_truth_list = [entry['true_disease']]
        disease_pred = entry['disease_prediction']
        correct, sim = eval.embedding_list_hit(disease_truth_list, disease_pred, sparse_sim_threshold)

        entry['disease_correct'] = correct
        entry['sparse_similarity'] = sim
        new_record.append(entry)
        
        total_correct += correct
    
    util.save_as_json(new_record, record_path)

    if not os.path.exists(score_path):
        result = [{'disease_recall': total_correct/len(prediction_record)}]
        util.save_as_json(result, score_path)
    else:
        result = util.load_json(score_path)
        result.append([{'disease_recall': total_correct/len(prediction_record)}])
        util.save_as_json(result, score_path)

In [25]:
exp_num = 5
for i in range(exp_num):
    print(f'Evaluation Iteration {i}\n')
    qa_pairs = util.load_json('evaluation_dataset/qa_pairs.json')

    prediction_record_path = f'manual_evaluation_results/prediction_record{i}.json'

    list_respond(
        qa_pairs, 
        prediction_record_path
    )

    record_path = f'manual_evaluation_results/prediction_record{i}.json'
    score_path = f'manual_evaluation_results/prediction_score{i}.json'
    disease_recall(record_path, 0.47, score_path)


Evaluation Iteration 0

Loaded content from evaluation_dataset/qa_pairs.json
Generating responses.


25it [00:57,  2.31s/it]


Saved content to manual_evaluation_results/prediction_record0.json
Loaded content from manual_evaluation_results/prediction_record0.json


100%|██████████| 25/25 [00:11<00:00,  2.14it/s]


Saved content to manual_evaluation_results/prediction_record0.json
Saved content to manual_evaluation_results/prediction_score0.json
Evaluation Iteration 1

Loaded content from evaluation_dataset/qa_pairs.json
Generating responses.


25it [00:50,  2.00s/it]


Saved content to manual_evaluation_results/prediction_record1.json
Loaded content from manual_evaluation_results/prediction_record1.json


100%|██████████| 25/25 [00:05<00:00,  4.51it/s]


Saved content to manual_evaluation_results/prediction_record1.json
Saved content to manual_evaluation_results/prediction_score1.json
Evaluation Iteration 2

Loaded content from evaluation_dataset/qa_pairs.json
Generating responses.


25it [00:49,  1.98s/it]


Saved content to manual_evaluation_results/prediction_record2.json
Loaded content from manual_evaluation_results/prediction_record2.json


100%|██████████| 25/25 [00:04<00:00,  5.27it/s]


Saved content to manual_evaluation_results/prediction_record2.json
Saved content to manual_evaluation_results/prediction_score2.json
Evaluation Iteration 3

Loaded content from evaluation_dataset/qa_pairs.json
Generating responses.


25it [00:52,  2.12s/it]


Saved content to manual_evaluation_results/prediction_record3.json
Loaded content from manual_evaluation_results/prediction_record3.json


100%|██████████| 25/25 [00:06<00:00,  4.06it/s]


Saved content to manual_evaluation_results/prediction_record3.json
Saved content to manual_evaluation_results/prediction_score3.json
Evaluation Iteration 4

Loaded content from evaluation_dataset/qa_pairs.json
Generating responses.


25it [00:47,  1.90s/it]


Saved content to manual_evaluation_results/prediction_record4.json
Loaded content from manual_evaluation_results/prediction_record4.json


100%|██████████| 25/25 [00:01<00:00, 14.59it/s]

Saved content to manual_evaluation_results/prediction_record4.json
Saved content to manual_evaluation_results/prediction_score4.json



