# RAG

## Install Packages

In [None]:
#!pip3 install langchain

## Import packages

In [None]:
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import ElasticsearchStore
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.runnable import RunnableLambda
from langchain.schema import HumanMessage
from urllib.request import urlopen
import os, json

load_dotenv()

elastic_cloud_id=os.getenv('ES_CLOUD_ID')
elastic_user=os.getenv('ES_USER')
elastic_password=os.getenv('ES_PASSWORD')
elastic_endpoint=os.getenv('ES_ENDPOINT')
elastic_index_name='byte-discuss-langchain-rag'


## Add documents

### Let's download the sample dataset and deserialize the document.

In [None]:
url = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/example-apps/workplace-search/data/data.json"

response = urlopen(url)

workplace_docs = json.loads(response.read())

### Split Documents into Passages

In [None]:
metadata = []
content = []

for doc in workplace_docs:
  content.append(doc["content"])
  metadata.append({
      "name": doc["name"],
      "summary": doc["summary"],
      "rolePermissions":doc["rolePermissions"]
  })

text_splitter = CharacterTextSplitter(chunk_size=50, chunk_overlap=0)
docs = text_splitter.create_documents(content, metadatas=metadata)

## Index Documents using ELSER - SparseVectorRetrievalStrategy()

In [None]:

es = ElasticsearchStore.from_documents(
    docs,
    es_cloud_id=elastic_cloud_id,
    es_user=elastic_user,
    es_password=elastic_password,
    index_name=elastic_index_name,
    strategy=ElasticsearchStore.SparseVectorRetrievalStrategy()
)

es

## Show Result

In [None]:
def showResults(output):
  print("Total results: ", len(output))
  for index in range(len(output)):
    print(output[index])

## Search

In [None]:
r = es.similarity_search("what is the goal for 2024")
showResults(r)

## RAG with Elasticsearch - Method 1 (Using Retriever)

In [None]:
retriever = es.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | prompt 
    | ChatOpenAI() 
    | StrOutputParser()
)

chain.invoke("vacation policy")

## RAG with Elasticsearch - Method 2 (Without Retriever)

### Add Context

In [None]:
def add_context(question: str):
    r = es.similarity_search(question)
    
    context = "\n".join(x.page_content for x in r)
    
    return context

### Chain

In [None]:

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

chain = LLMChain(llm=ChatOpenAI(), prompt=prompt)

chain = (
    {"context": RunnableLambda(add_context), "question": RunnablePassthrough()}
    | prompt
    | ChatOpenAI()
    | StrOutputParser()
)

chain.invoke("canada employees guidelines")

## Compare with RAG and without RAG

In [None]:
q = input("Ask Question: ")

## Question to OpenAI

chat = ChatOpenAI()

messages = [
    HumanMessage(
        content=q
    )
]

gpt_res = chat(messages)

# Question with RAG

gpt_rag_res = chain.invoke(q)


# Responses

s = f"""
ChatGPT Response:

{gpt_res}

ChatGPT with RAG Response:

{gpt_rag_res}
"""

print(s)