# RAG Process - Inference

### Load the needed libraries

In [1]:
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores.pinecone import Pinecone
from domino_data.vectordb import DominoPineconeConfiguration
from mlflow.deployments import get_deploy_client
from langchain_community.embeddings import MlflowEmbeddings
from langchain_community.chat_models import ChatMlflow

import pinecone
from getpass import getpass
import warnings
warnings.filterwarnings('ignore')

  from tqdm.autonotebook import tqdm
* 'schema_extra' has been renamed to 'json_schema_extra'


### Load environment variables

Notice that the API Keys that you would normally need are commented out. Using Domino AI Gateway Endpoints for your model access and Domino Vector Data Sources for vector store access means that the API keys are managed and stored securely by the Admins.

In [2]:
#os.environ['ANTHROPIC_API_KEY'] = getpass("Enter Anthropic key:")
#os.environ['OPENAI_API_KEY'] = getpass("Enter OpenAI API key:")
#os.environ['PINECONE_API_KEY'] = getpass("Enter Pinecone API key:")
#os.environ['PINECONE_ENV'] = getpass("Enter Pinecone Environment:")

#OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') 
#PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
PINECONE_ENV = os.getenv('PINECONE_API_ENV')
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

### Create embeddings to embed queries using Domino AI Gateway Endpoint in LangChain

In [10]:
embed = MlflowEmbeddings(
    target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
    endpoint="embedding-ada-002ja2",
)

### Initialize Pinecone vector store using a Domino-specific Environment

In [11]:
# Pinecone data field containing the doc text 
text_field = "text"

# Domino Pinecone Data Source name 
datasource_name = "nwh-benefits"

# Load Domino Pinecone Data Source Configuration 
conf = DominoPineconeConfiguration(datasource=datasource_name)

# api_key variable is a mandatory non-empty field used for 
# the native pinecone python client initialization 
# using Pinecone Data Source name 
api_key = os.environ.get("DOMINO_VECTOR_DB_METADATA", datasource_name)

# initialize pinecone
pinecone.init(
    api_key=api_key, 
    environment=PINECONE_ENV,
    openapi_config=conf # Domino Pinecone Data Source Configuration 
)

# Load Pinecone Index
index_name = "nwh-plus-benefits"
index = pinecone.Index(index_name)

# switch back to normal index for langchain
vectorstore = Pinecone(
    index, embed.embed_query, text_field # Using embedded data from Domino AI Gateway Endpoint
)

### Check index current stats as a simple checkpoint 

You'll see that the index has a ```total_vector_count```. This shows the number of vectors are currently present.

In [12]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.00696,
 'namespaces': {'': {'vector_count': 696}},
 'total_vector_count': 696}

### Create the Prompt Template

In [13]:
prompt_template = """You are an AI assistant for answering questions about employee benefits in health care policy documentation.
You are given the following extracted parts of a long document and a question. 
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
If the question is not about employee benefits or policy coverage, politely inform them that you are tuned to only answer questions pertaining to policy coverage.
Question: {question}
=========
{context}
=========
Answer in Markdown:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["question", "context"])
#
chain_type_kwargs = {"prompt": PROMPT}

### Using the Domino AI Gateway Endpoint via Langchain ChatMLflow object

To use a different model, change the ```endpoint``` parameter to a different endpoint name.  

In [14]:
rag_llm = ChatMlflow(
        target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
        endpoint="chat-gpt35turbo-ja", 
        temperature=1.0,
    )

### Initialize the  RetrievalQA chain for answering questions from the embedded data in the vectorstore

In [15]:
qa_chain = RetrievalQA.from_chain_type(llm=rag_llm, # AI Gateway Endpoint
                                       chain_type="stuff",
                                       chain_type_kwargs={"prompt": PROMPT},
                                       retriever=vectorstore.as_retriever(search_kwargs={"k": 5}), # Domino Pinecone Data Source
                                       return_source_documents=True
                                      )

### Get question to answer in the docs and run the chain

In [16]:
user_question = input("Please ask your policy question:")
result = qa_chain(user_question)

Please ask your policy question: what about maternity?


### Retrieve the result

In [17]:
result['result']

'Northwind Health Plus provides comprehensive coverage for maternity care, including prenatal and post-natal care, and labor and delivery services. Prenatal care, delivery, and post-natal care are all covered, including necessary treatments, tests, medications, and follow-up care for up to six weeks post-delivery. However, the coverage does not include services for pre-existing conditions or elective procedures. If you have questions about your coverage or need help understanding the policy, you can contact the Member Services team at 1-800-123-4567.'

### Display Source Documents retrieved from the vector store and used for the answer

In [18]:
result['source_documents'][0].page_content

'coverage for maternity care, including prenatal and post -natal care and labor and delivery \nservices.  \nPrenatal Care  \nPrenatal care is essential for both the mother and baby, as it helps to ensur e the health and \nsafety of both during pregnancy. Northwind Health Plus covers all services related to \nprenatal care, including office visits, tests, and ultrasounds. Additionally, Northwind Health \nPlus covers any necessary vaccines or medications that may  be prescribed by your doctor \nduring prenatal care.  \nDelivery and Post -natal Care  \nNorthwind Health Plus covers all services related to the delivery of your baby, including \nlabor and delivery, as well as post -natal care for both mother and baby. This includ es any \nnecessary treatments, tests, or medications prescribed by your doctor. Northwind Health \nPlus also covers any necessary follow -up care for both mother and baby for up to six weeks \npost -delivery.  \nExceptions'