This notebook represents a hands-on coding exercise based on an article, infused with some of my own understanding. Follow this url: https://towardsdatascience.com/retrieval-augmented-generation-rag-from-theory-to-langchain-implementation-4e9bd5f6a4f2 for more detail.

#### Prepare env

In [1]:
import dotenv
dotenv.load_dotenv('../../.env')

True

#### Collect and load your data

In [2]:
import requests
from langchain.document_loaders import TextLoader

url = "https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/modules/state_of_the_union.txt"
res = requests.get(url)
with open("state_of_the_union.txt", "w") as f:
    f.write(res.text)

loader = TextLoader('./state_of_the_union.txt')
documents = loader.load()

#### Chunk your documents

In [3]:
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

#### Embed and store chunks

In [None]:
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options = EmbeddedOptions()
)

vectorstore = Weaviate.from_documents(
    client = client,    
    documents = chunks,
    embedding = AzureOpenAIEmbeddings(),
    by_text = False
)

#### Retrieve

In [5]:
retriever = vectorstore.as_retriever()


#### Augment

In [6]:
from langchain.prompts import ChatPromptTemplate

template = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. \nUse the following pieces of retrieved context to answer the question. \nIf you don't know the answer, just say that you don't know. \nUse three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:\n"))]


#### Generate

In [13]:
from langchain.chat_models import AzureChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

llm = AzureChatOpenAI(azure_deployment="gpt-35-turbo-16k", openai_api_version="2023-08-01-preview", temperature=0)

rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()} 
    | prompt 
    | llm
    | StrOutputParser() 
)

query = "What did the president say about Justice Breyer"
rag_chain.invoke(query)

# you can see langsmith traces here:
# 1. https://smith.langchain.com/public/52c1f36e-d311-4c42-9f27-9822ad588986/r
# 2. https://smith.langchain.com/public/4983e081-2965-4de6-a96c-e87e916265ff/r

"The president thanked Justice Breyer for his service and acknowledged his dedication to serving the country. The president also mentioned that he nominated Judge Ketanji Brown Jackson as a successor to continue Justice Breyer's legacy of excellence."

In [23]:
# The chain-style syntax above can be a bit challenging to grasp; however, it's perfectly acceptable to write it this way as well.

rag_chain = (
    RunnablePassthrough.assign(context=(lambda x: retriever.get_relevant_documents(x["question"])))
    | prompt
    | llm
    | StrOutputParser()
)

# **NOTE**: This is not working in gpt 3.5 but do in gpt 4.
# rag_chain = (
#     RunnablePassthrough.assign(context=(lambda x: "\n\n".join(doc.page_content for doc in [retriever.get_relevant_documents(x["question"])])))
#     | prompt
#     | llm
#     | StrOutputParser()
# )


query = "What did the president say about Justice Breyer"
rag_chain.invoke({"question": query})

"The president thanked Justice Breyer for his service and acknowledged his dedication to serving the country. The president also mentioned that he nominated Judge Ketanji Brown Jackson as a successor to continue Justice Breyer's legacy of excellence."