In [None]:
# Target implement RAG

# pending features:
# restrict to high score document threshhold

In [235]:
from langchain.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [236]:
base_url="http://localhost:11434"
model="zephyr"  #orca-mini , mistral, or zephyr

llm = Ollama(base_url=base_url, model=model, 
callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

In [None]:
#llm("Hola!")

In [None]:
#Directory loader, including text, pdf
from langchain.document_loaders import DirectoryLoader
input_dir = "./data/"
data = DirectoryLoader(input_dir , use_multithreading=True).load()
len(data)

In [None]:
# split it into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(data)
len(all_splits)

In [None]:
#Vectorize & store
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma
index_name = "index"
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings(), persist_directory="./indexes/" + index_name)

In [None]:
# load chromadb vectorstore from disk <-works
index_name = "index"
db2 = Chroma(persist_directory="./indexes/" + index_name , embedding_function=GPT4AllEmbeddings())
print("Rows:", db2._collection.count())

In [None]:
# load from disk & query <-works
index_name = "index"
db3 = Chroma(persist_directory="./indexes/" + index_name, embedding_function=GPT4AllEmbeddings())
query = "hello"
docs = db3.similarity_search(query)
docs

In [241]:
# this prompt does not require the template library
system_prompt = '''
You are an expert. Write your answer following these criteria:
* Respond exclusively based on the documents provided in the {context}.
* Cite the exact source next to each paragraph.
* Indicate date, location, and entities related to each fact you cite.
* Write in an elegant, professional, diplomatic style.

If the context documents provided do not contain the answer:
* do not generate a response based on your neural network, 
* instead respond with this sentence exactly: "The knowledge base does not have enough information about your question." 

This is the question you are responding: \n
'''

In [None]:
# LLM RAG query <-works with prompt above. Without using a template | successfully says it does not have enough info, however it adds a response based on the neural network.
from langchain.chains import RetrievalQA
# expose this index in a retriever interface
retriever = db2.as_retriever(search_type="similarity", search_kwargs={"k":2})   #k = 10 gives nice results # can use "mmr" or "similarity"

# create a chain to answer questions 
qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
question = '''
Why is the sky blue?'''

result = qa({"query": system_prompt + question})

In [None]:
#result # <-- works
# result["source_documents"] # <-works
#result["source_documents"][:] # <-- works
#result["source_documents"][0].metadata # <-- works
# for source in result["source_documents"][:]: # <-- works
#     print(source.metadata)

#print sources
for source in result["source_documents"][:]: # <-- works
    print(source.metadata["source"])

In [225]:
# LLM RAG with system prompt <-- works. Lesson-learned: the content in the template takes priority over the system prompt.
from langchain.prompts import PromptTemplate
# Build prompt
system_prompt = '''
You are an expert. Execute the instruction given to you following these criteria:
* Cite the exact source next to each paragraph.
* Indicate date, location, and entities related to each fact you cite.
* Write in an elegant, professional, diplomatic style. 
'''
template = system_prompt + '''Use the following pieces of context: {context} to respond the instruction in this: {question}.
If the context provided does not contain the answer:
* do not generate a response, 
* instead respond with this sentence exactly: "The knowledge base does not have enough information about your question.
''' 

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)# Run chain

from langchain.chains import RetrievalQA
# expose this index in a retriever interface
retriever = db2.as_retriever(search_type="similarity", search_kwargs={"k":5})   #k = 10 gives nice results # can use "mmr" or "similarity"

qa_chain = RetrievalQA.from_chain_type(
    llm=llm, retriever=retriever, return_source_documents=True, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)
template

'\nYou are an expert. Execute the instruction given to you following these criteria:\n* Cite the exact source next to each paragraph.\n* Indicate date, location, and entities related to each fact you cite.\n* Write in an elegant, professional, diplomatic style. \nUse the following pieces of context: {context} to respond the instruction in this: {question}.\nIf the context provided does not contain the answer:\n* do not generate a response, \n* instead respond with this sentence exactly: "The knowledge base does not have enough information about your question.\n'

In [None]:
# Works with above cell. It is very strict if it does not find the exact answer it says so.
question = "How does a person learns how to dance"

result = qa_chain({"query": question})
# Check the result of the query
result["result"]
# Check the source document from where we 
result["source_documents"][:]

In [None]:
# R & D

In [None]:
# search with score <--- works
query = "what is the meaning of ...?"
docs = db2.similarity_search_with_score(query) #lower the score the more similar. 
docs
#print(docs[0].page_content)
#print(docs[0].metadata)
#docs[0]
#len(docs) #by default, the top 4 results are returned


In [None]:
# delete collection
#print("Count:", db2._collection.count())
#db2.delete_collection()