In [118]:
from langchain import PromptTemplate, LLMChain, HuggingFaceHub
from langchain_community.llms import GPT4All
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
from sentence_transformers.util import dot_score
import numpy as np

In [119]:
# We will be using ORCA2-13b and FALCON-7b
ORCA_MODEL_PATH="C:\\Users\\azaha\\AppData\\Local\\nomic.ai\\GPT4All\\orca-2-13b.Q4_0.gguf"
FALCON_MODEL_PATH="C:\\Users\\azaha\\AppData\\Local\\nomic.ai\\GPT4All\\gpt4all-falcon-q4_0.gguf"

# HF_KEY = os.environ["HF_KEY"]
# os.environ['HUGGINGFACEHUB_API_TOKEN'] = HF_KEY

# Setup embedding model for Vector DB
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L12-v2"
emb_kw_args = {"device":"cuda"}
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL, model_kwargs=emb_kw_args)

In [120]:
## helper method that concats retrieved text for the prompt.
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

## utility method to print text as part of the lang-chain chain
def _print(text):
    print(text)
    return text

In [121]:
# RAG and Non-RAG Langchain prompt template

rag_template = """ 
            Answer only using the context.

            Context: {context}

            Question: {question}
               
            Answer: """


non_rag_template = """ 
            Answer the question: {question}
               
            Answer: """

In [122]:
# Define all the components of the chain

# Prompt template for RAG request
rag_prompt = PromptTemplate(template=rag_template, input_variables=['context','question'])

# Prompt template for Non-RAG request
non_rag_prompt = PromptTemplate(template=non_rag_template, input_variables=['question'])

# Document pipeline

# Load text from document source (directory of blog posts)
dir_loader = DirectoryLoader("C:\\Users\\azaha\\py_code\\rag_test\\data\\posts", glob="**/*.txt", use_multithreading=True)
blog_docs = dir_loader.load()

# Initialise splitter and generate chunks (token chunking with 10% overlap)
text_splitter = TokenTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap=10)
docs = text_splitter.split_documents(blog_docs)

# Initialise vector database (persisted Chroma) and associated retriever
db = Chroma.from_documents(docs, embeddings, persist_directory="./data")
retriever = db.as_retriever()

# Initialise GPT4ALL with ORCA model to run on local machine
gpt4all = GPT4All(model=ORCA_MODEL_PATH)



In [123]:
# Create the Non-RAG chain
non_rag_chain = LLMChain(
    prompt=non_rag_prompt,
    llm=gpt4all
)

In [124]:
# Create the RAG chain - note the use of _print utility method which can be removed
rag_chain = (
    {"context":retriever | format_docs | _print, "question": RunnablePassthrough()} 
    | rag_prompt 
    | gpt4all
)


In [125]:
## Main Run

# Question database - feel free to modify
question_db = [
    "What is InfluxDB?",
    "What does 104+101 equal to?",
    "What is the Tibco Action Processor?"
]

In [126]:
# Comparing RAG/Non-RAG response
rag_response = []
rag_embed = []

non_rag_response = []
non_rag_embed = []

for q in question_db:
    rag_response.append(rag_chain.invoke(q))
    rag_embed.append(np.array(embeddings.embed_query(rag_response[-1])))

    non_rag_response.append(non_rag_chain.run(q))
    non_rag_embed.append(np.array(embeddings.embed_query(non_rag_response[-1])))

# Using sentence transformer to calculate embeddings and then using that to calculate similarity.
for idx, r in enumerate(rag_embed):
    nr = non_rag_embed[idx]
    
    print("RAG:",rag_response[idx], "Non RAG:",non_rag_response[idx])
    print(question_db[idx], "-->",np.linalg.norm(nr-r), dot_score(nr,r))



box web-service) from which we need to 'extract' data, then we need to 'transform' it from source format to destination format (filtering, mapping etc.) and finally 'load' it into the destination (a file, a database, a black-box web-service).In many situations, using a commercial third-party data-load tool or a data-loading component integrated with the destination  (e.g. SQL*Loader)

box web-service) from which we need to 'extract' data, then we need to 'transform' it from source format to destination format (filtering, mapping etc.) and finally 'load' it into the destination (a file, a database, a black-box web-service).In many situations, using a commercial third-party data-load tool or a data-loading component integrated with the destination  (e.g. SQL*Loader)

box web-service) from which we need to 'extract' data, then we need to 'transform' it from source format to destination format (filtering, mapping etc.) and finally 'load' it into the destination (a file, a database, a black