In [None]:
%pip install -qU langchain pinecone-client

In [None]:
# Import necessary modules
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from RAG import init_rag


index_name = 'notion-db-chatbot'
openai_api_key, vectordb = init_rag(index_name)


In [None]:
retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retrieved_docs = retriever.get_relevant_documents(
    "How do I answer: This lead doesnt have a score?"
)
len(retrieved_docs)


In [None]:
for i in range(len(retrieved_docs)):
    print(retrieved_docs[i])
    print(retrieved_docs[i].metadata['source'])


In [None]:

# initialize the LLM
llm = ChatOpenAI(
        openai_api_key=openai_api_key,
        model_name='gpt-3.5-turbo',
        temperature=0.0
    )

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
Question: {question}
Helpful Answer:"""
rag_prompt_custom = PromptTemplate.from_template(template)


# create the function that retrieves source information from the retriever
def query_llm_with_source(retriever, query):
    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever
    )
    results = qa_chain({
            'question': query
            # ,'rag_prompt_custom': rag_prompt_custom
                        })
    print(str(results))
    return results

retriever = vectordb.as_retriever(search_type="similarity", search_kwargs={"k": 6})

query = "How do I answer: This lead doesnt have a score?"
results = query_llm_with_source(retriever, query)


In [None]:
print(results['answer'])
print(results['sources'])

In [None]:
# create a function to format the sources as a link
# we will get the id from the metadata of the retrieved document as described in the example below:
# if sources = "notion_data/support_runbook/Support runbooks d2a894351f944fc5b4abb9f29f30b4a4/User cannot deploy model from the Studio dd2630484a334e159dd9bf07086824ad.md" then the id is dd2630484a334e159dd9bf07086824ad
prefix = 'https://www.notion.so/madkudu/'

def format_sources(sources):
    formatted_sources = []
    source_id = sources.split('/')[-1].split('.')[0].replace(' ','-')
    formatted_sources.append(prefix + source_id)
    return formatted_sources


In [None]:
print(format_sources(results['sources']))