# Model Ensemble

This is the notebook detailing the model ensemble of passing in the BERT output as context for a user query. The idea is simple, based on a testing example, the BERT model will be called first and all output will be fed into another model for then interpretation to see if the results improve. For this testing, I will be using the fine-tuned version of GPT-3.5-Turbo due to its speed!

In [2]:
# gather all imports

import pandas as pd
import numpy as np
import json
from datasets import Dataset
import torch
from transformers import AutoTokenizer
from transformers import  AutoModel
from sklearn.metrics.pairwise import cosine_similarity
import json
import re
from openai import OpenAI

As before, start by loading in the data

In [3]:
def load_and_prepare(file_path):
    """
    Helper function to load in the data into a specific form 

    @PARAMS:
        - file_path -> the file to process
    """
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        
        # format data to be just question answer pairs
        formatted_data = []
        for entry in data:
            formatted_data.append({
                "question": entry["Question"],
                "response": entry["Answer"]
            })
        
        print(f"Loaded {len(formatted_data)} Q&A pairs from {file_path}!")
        return formatted_data
        
    except Exception as e:
        print(f"Error loading in file...\n{e}")
        return []

In [4]:
# load in formatted data
## TRAIN ##
train_data = load_and_prepare("processed_data/train.json")

## VAL ##
val_data = load_and_prepare("processed_data/validation.json")

## TEST ##
test_data = load_and_prepare("processed_data/test.json")

# print out one value of each to make sure it is loaded correctly
print(train_data[0])
print(val_data[0])
print(test_data[0])

Loaded 18749 Q&A pairs from processed_data/train.json!
Loaded 2344 Q&A pairs from processed_data/validation.json!
Loaded 2344 Q&A pairs from processed_data/test.json!
{'question': 'will eating late evening meals increase my cholesterol?', 'response': 'no. it is what you are eating (as well as your genetics) not when you eat it. it depends on the kinds of foods that you eat. make sure that you are eating healthy foods in order to not gain great amount of cholesterol. you have to always watch what you eat in order to have a healthy skin and body. you may check out www. clearclinic. com for great ideas to achieve an acne free skin.'}
{'question': 'who is affected by arthritis?', 'response': 'arthritis sufferers include men and women children and adults. approximately 350 million people worldwide have arthritis. nearly 40 million people in the united states are affected by arthritis including over a quarter million children! more than 27 million americans have osteoarthritis. approximately

Coppying the exact BERT model from BERT.ipynb and using this for input to the text generation step.

In [5]:
class BERT:
    """
    This class is the implementation of a BERT recommendation model.
    Without tuning, this model will recomend a response based on a set of confidence values that come from the set of all interactions.

    This will be passed to a generative model.
    """

    def __init__(self, model_name="bert-base-uncased", use_gpu=True):
        """
        Initializer function to establish the pre-trained model

        @PARAMS:
            - model_name -> specifc pre-trained model to initialize
            - use_gpu    -> speed up runtime by using GPU if available
        """
        # empty cache if possible
        if torch.cuda.is_available():
            torch.cuda.empty_cache()

        self.device = torch.device("cuda" if torch.cuda.is_available() and use_gpu else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name).to(self.device)

        # values for the response
        self.data = None
        self.embeddings = None

    def get_embeddings(self, questions, batch_size=16):
        """
        Function to return a set of question embeddings.
        
        @PARAMS:
            - questions  -> the patient query
            - batch_size -> batch split to save time
        """
        embeddings = []

        with torch.no_grad():
            # process in batches to save time
            for i in range(0, len(questions), batch_size):
                batch_questions = questions[i:i + batch_size]
                
                # tokenize this batch
                inputs = self.tokenizer(
                    batch_questions,
                    padding=True,
                    truncation=True,
                    max_length=512,
                    return_tensors="pt"
                ).to(self.device)
                
                # get the BERT outputs
                outputs = self.model(**inputs)
                
                # convert to numpy
                batch_embeddings = outputs.last_hidden_state[:, 0, :].cpu().numpy()
                embeddings.extend(batch_embeddings)
                
                # print progress!!
                if (i + 1) % (batch_size * 10) == 0:
                    print(f"Processed {i + 1}/{len(questions)} questions")
                
        return np.array(embeddings)
    
    def embed_data(self, data, batch_size=16):
        """
        Function to convert the data into a form that includes the embeddings.
        
        @PARAMS:
            - data       -> patient and doctor interactions
            - batch_size -> batch split to save time
        """
        self.data = data

        questions = [point['question'] for point in data]
        #responses = [point['response'] for point in data]

        # now get the embeddings for the q/r
        embeddings = self.get_embeddings(questions, batch_size)
        self.embeddings = embeddings

        # finally return all info with the embeddings
        return Dataset.from_dict({
            'question': questions,
            'embedding': embeddings.tolist()
        })
    
    def get_similar_responses(self, question, amount=3, threshold=.7):
        """ 
        Function to search through all the data and get the similar interaction cases.

        @PARAMS:
            - question  -> the input to search against
            - amount    -> how many to return
            - threshold -> cosine similarity results must return above this
        """
        if self.data is None or self.embeddings is None:
            raise ValueError("No data accessible...")
        
        # start by getting the embedding for the question input
        question_embedding = self.get_embeddings([question])

        # now run cosine similarity against the previously calculated data embeddings
        results = cosine_similarity(question_embedding, self.embeddings)[0]
        # based on the amount input, get those top values
        top_indices = np.argsort(results)[-amount:][::-1]

        # now loop through the top values and append only the ones that meet the threshold
        similar = []
        for index in top_indices:
            if results[index] >= threshold:
                similar.append({
                    'question': self.data[index]['question'],
                    'response': self.data[index]['response'],
                    'similarity_score': float(results[index]),
                })
        return similar
    
    def get_recommendation(self, question):
        """ 
        Function to return a recommendation of a response based on a user input.

        @PARAMS:
            - question -> the input from the patient waiting for the returned doctor response
        """
        # first get all similar cases
        similar_cases = self.get_similar_responses(question)

        if similar_cases:
            return {
                "similarity_score": similar_cases[0]['similarity_score'],
                "most_similar_question": similar_cases[0]['question'],
                "recommendation": similar_cases[0]['response'],
                "other_matches": [
                    {
                        'similarity_score': case['similarity_score'], 
                        'question': case['question'],
                        "response": case['response']
                    } for case in similar_cases[1:]
                ]
            }
        # o/w return a default answer suggesting for seeking other advice
        else:
            return {
                "similarity_score": 0.0,
                "recommendation": "I recommend consulting a healthcare provider for a proper evaluation.",
                "other_matches": []
            }

Now the OpenAI code!

In [6]:
# read in the .env file
with open("../.env", "r") as f:
    content = f.readlines()

# extract the key from the first line
line = content[0].strip()

# pattern to capture key after '='
pattern = r'API_KEY\s*=\s*(\S+)'

# assign key
match = re.findall(pattern, line)

if match:
    API_KEY = match[0]
else:
    print("API_KEY not found!")

# first connect with the API_KEY we just generated
client = OpenAI(api_key=API_KEY)

Get the fine-tuned model.

In [7]:
# for ID info refer back to openai.ipynb
fine_tuned_model = client.fine_tuning.jobs.retrieve('ftjob-q2v0M5wDwhtPyEYGaMdJk4BI')

Finally, lets run the model and save the contents to a file!

In [None]:
# build the BERT model!
bert = BERT()
bert.embed_data(train_data)

# get test questions - run on 100 to save money
questions = [point['question'] for point in test_data[:100]]
responses = [point['response'] for point in test_data[:100]]

with open('ensemble_results.txt', "w", encoding='utf-8') as f:
    for question, response in zip(questions, responses):
        # first get bert response
        bert_response = bert.get_recommendation(question)

        # craft context for fine-tuned model
        system_message = "You are a medical expert helping patients talk through their concerns."
        user_message = f"""
                Attached is some context and a question, using the context come up with an accurate answer to the user's query. 
                The context is some real doctor responses to questions that are similar, if relevant, use these real responses to craft an answer to the user query. 
                Do not just copy the contents of the attached context as it might be answering a different question, just use it as support when crafting your answer.
                Only return the answer and make sure the information is accurate.
                
                Context: {bert_response}
                Question: {question}

        """
        # now get fine-tuned response
        fine_tune_completion = client.chat.completions.create(
            model="ft:gpt-3.5-turbo-0125:personal::AWJTxERx",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": user_message}
            ]
        )

        f.write(f"Question: {question}\n")
        f.write(f"Answer: {fine_tune_completion.choices[0].message.content}\n")
        f.write(f"Expected Response: {response}\n")
        # response seperator
        f.write("=" * 80 + "\n")