In [None]:
!pip install gradio simpletransformers pandas sentence_transformers

# Q&A application
+ Load the vector database
+ Obtain the embedding vector from query
+ Obtain the context by similarity between embedding vector and vector database
+ Feed the (context, query) to LLM model to obtain the answer
+ response the predicted answer to application (gradio)

In [None]:
from sentence_transformers import SentenceTransformer
from simpletransformers.question_answering import QuestionAnsweringModel
import gradio as gr
import random
import pandas
import os, json, time

# os.environ["TOKENIZERS_PARALLELISM"] = "False"

class PredictAnswer:
    def __init__(self):
        # embedding model
        self.emb_model_path = "TencentBAC/Conan-embedding-v1"
        self.emb_model = SentenceTransformer(self.emb_model_path, device="cpu")

        # Load vector database
        self.db_vector = []
        with open("embedding.json", "r", encoding="utf-8") as f_in:
            self.db_vector = json.load(f_in)

        self.db_emb_vectors = [data["embedding"] for data in self.db_vector]

        # Q & A model
        self.qa_model = QuestionAnsweringModel("bert", "./outputs/best_model")

    def __call__(self, query):
        emb_query = self.emb_model.encode(query) # compute the embedding vector for query
        similarities = self.emb_model.similarity(emb_query, self.db_emb_vectors)

        # Get context which is the most similar to emb_query
        idx_max = pandas.Series(similarities[0]).idxmax()
        context = self.db_vector[idx_max]["context"]
        # Make predictions with the model
        llm_query = [
            {
                "context": context,
                "qas": [
                    {
                        "question": query,
                        "id": str(random.random()),
                    }
                ],
            }
        ]

        print(json.dumps(llm_query, indent=4))

        predictions, probabilities = self.qa_model.predict(llm_query, n_best_size=2)
        answers = ""

        for idx, answer in enumerate(predictions[0]["answer"]):
            answers = answers + "* " + str(int(probabilities[0]["probability"][idx]*100)) + "%" + "\n" + ("not-found-answer" if answer == "empty" else answer)
            answers = answers + "\n\n"

        return answers


predict_answer = PredictAnswer()
# print(predict_answer("What is the Grotto at Notre Dame"))

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            text_question = gr.Textbox(label="Question")
            button_submit = gr.Button(value="Submit")
        with gr.Column():
            text_answer = gr.Textbox(label="Predicted Answer")

    button_submit.click(predict_answer, inputs=text_question, outputs=text_answer)
    examples = gr.Examples(examples=["Where is the headquarters of the Congregation of the Holy Cross?", "What is the primary seminary of the Congregation", "What is the oldest structure?"],
                           inputs=[text_question])
    
demo.launch()