In [0]:
#%pip install -q transformers sentence-transformers
dbutils.library.restartPython()

In [0]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from databricks.vector_search.client import VectorSearchClient
from mlflow.pyfunc import PythonModel
import mlflow

class DocQAModel(PythonModel):
    def load_context(self, context):
        # Load the embedding model (same as used to create doc_embeddings table)
        self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

        # Load Hugging Face Q&A model
        self.qa_pipeline = pipeline(
            "question-answering",
            model="deepset/roberta-base-squad2",
            tokenizer="deepset/roberta-base-squad2"
        )

        # Load vector search index (replace with your actual index and endpoint)
        self.vs_client = VectorSearchClient()
        self.index = self.vs_client.get_index(
            endpoint_name="your_vector_endpoint",      # 🔁 Replace this
            index_name="doc_embeddings_index"          # 🔁 Replace this
        )

    def predict(self, context, model_input):
        # Extract the user's question
        question = model_input["question"][0]

        # Embed the question
        question_vec = self.embedding_model.encode([question]).tolist()[0]

        # Retrieve top 5 most similar chunks using vector search
        results = self.index.similarity_search(
            query_vector=question_vec,
            columns=["chunk_id", "file_name", "text_chunk"],
            num_results=5
        )

        # Combine retrieved chunks into one context string
        context_text = "\n\n".join([r["text_chunk"] for r in results])

        # Ask HuggingFace Q&A model
        answer = self.qa_pipeline({
            "question": question,
            "context": context_text
        })["answer"]

        return answer
