# SK: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.
Pre-requisite: You have already run L4-SK-CreateDB notebook to import the product catalog CSV file to a Chroma vector DB

In [None]:
#pip install --upgrade semantic-kernel

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [2]:
import semantic_kernel as sk
import os
import logging
from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('__name__')
kernel=sk.Kernel(log=logger)

api_key = os.environ['OPENAI_API_KEY']
kernel.add_chat_service(
        "chat-gpt", OpenAIChatCompletion("gpt-3.5-turbo-0301", api_key)
)

<semantic_kernel.kernel.Kernel at 0x10f33b6d0>

In [3]:
from semantic_kernel.connectors.ai.open_ai import OpenAITextEmbedding
kernel.add_text_embedding_generation_service(
        "ada", OpenAITextEmbedding("text-embedding-ada-002", api_key)
    )

<semantic_kernel.kernel.Kernel at 0x10f33b6d0>

In [4]:
# ------------------------------------------------
from qdrant_client import QdrantClient
from semantic_kernel.connectors.memory.qdrant import QdrantMemoryStore

qdrant_client = QdrantClient(
            url=os.environ['QDRANT_URL'], 
            api_key=os.environ['QDRANT_API_KEY'],
            timeout=20,
        )

qdrantMemory = QdrantMemoryStore(768)
qdrantMemory._qdrantclient = qdrant_client

kernel.register_memory_store(memory_store=qdrantMemory)

In [5]:
query ="Công đoàn là gì?"
summarize = kernel.create_semantic_function(query, temperature=0.0)


In [7]:
# Query the vector DB locally
docs = await kernel.memory.search_async(collection="context", limit=5, min_relevance_score=0.3, query=query)

KeyError: '_is_reference'

In [None]:
docs[0].text

In [None]:
# Now lets augment the LLM query with retrieval from the local vector DB with the RAG (Retrieval Augmented Generation) pattern
# The prompt below should be self explanatory of what we are tryign to do with this RAG pattern
async def ragqna(kernel, query, limit) -> str:
    # Step1: Retrieval: Get list of documents from local DB matching the query
    docs = await kernel.memory.search_async(collection="outdoordb", limit=limit, min_relevance_score=0.3, query=query)
    # Step2: Augment: Construct the augmented prompt from the retrieved document. Retrieved docs separated by triple backticks to make it easy for LLM to instruct
    qdocs = "\n```\n".join([docs[i].text for i in range(len(docs))])
    
    prompt = """{{ $qdocs}} 
    
    Question: Please query above documents delimited by triple backticks for {{ $query }} 
    and return results in a table in markdown and summarize each one.
    """
    
    # Step3: Generation: Generate a summary and markdown formatted output as requested in the prompt from the LLM API
    summarize = kernel.create_semantic_function(prompt, temperature=0.0)
    context_variables = sk.ContextVariables(variables={
        "qdocs": qdocs,
        "query": query
    })
    response = summarize(variables=context_variables)
    return response

In [None]:
result = await ragqna(kernel, "shirts with sunblocking",3)

In [None]:
str(result)


In [None]:
from IPython.display import display, Markdown
display(Markdown(str(result)))