In [2]:
#<imports>
from getpass import getpass
import os

import gzip
import json
import requests
from tqdm import tqdm
import sys
import os

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate

from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage, AIMessage
#<\imports>

In [None]:
#<set openai api token>
#url for how to use azure open ai: https://python.langchain.com/v0.2/docs/integrations/llms/azure_openai/

OPENAI_KEY = getpass()
# Set the API token in the environment variable
os.environ['OPENAI_API_KEY'] = OPENAI_KEY
#<\set openai api token>

In [4]:
#<helper functions>
def http_get(url:str, path:str) -> None:
    """
    Downloads a URL to a given path on disc
    """
    if os.path.dirname(path) != "":
        os.makedirs(os.path.dirname(path), exist_ok=True)

    req = requests.get(url, stream=True)
    if req.status_code != 200:
        print("Exception when trying to download {}. Response {}".format(url, req.status_code), file=sys.stderr)
        req.raise_for_status()
        return

    download_filepath = path + "_part"
    with open(download_filepath, "wb") as file_binary:
        content_length = req.headers.get("Content-Length")
        total = int(content_length) if content_length is not None else None
        progress = tqdm(unit="B", total=total, unit_scale=True)
        for chunk in req.iter_content(chunk_size=1024):
            if chunk:  # filter out keep-alive new chunks
                progress.update(len(chunk))
                file_binary.write(chunk)

    os.rename(download_filepath, path)
    progress.close()
#<\helper functions>

In [None]:
#<load wikipedia data>
wikipedia_filepath = 'simplewiki-2020-11-01.jsonl.gz'

http_get('http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz', wikipedia_filepath)
#<\load wikipedia data>

In [None]:
#<check data>
wikipedia_filepath = 'simplewiki-2020-11-01.jsonl.gz'

passages = []
with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:
    for line in fIn:
        data = json.loads(line.strip())

        #Add all paragraphs
        #passages.extend(data['paragraphs'])

        #Only add the first paragraph
        passages.append(data['paragraphs'][0])

print("Passages:", len(passages))
#<\check data>

In [None]:
#<use subset of wiki>
passages = [passage for passage in passages for x in ['fish', 'india', 'cheetah']
              if x in passage.lower().split()]
passages = [passage for passage in passages for x in ['flying fish', 'india', 'cheetah']
              if x in passage.lower()]

print(len(passages))
print(passages[0])
#<\use subset of wiki>

In [None]:
#<load llm>
chatgpt = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
#<\load llm>

In [None]:
#<make embedings and store in chroma vec db>
# details here: https://openai.com/blog/new-embedding-models-and-api-updates
openai_embed_model = OpenAIEmbeddings(model='text-embedding-3-small')
docs = [Document(page_content=doc) for doc in passages]
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=300)
chunked_docs = splitter.split_documents(docs)

#create vector DB of docs and embeddings
chroma_db = Chroma.from_documents(documents=chunked_docs, collection_name='wiki_db',
                                  embedding=openai_embed_model,
                                  # need to set the distance function to cosine else it uses euclidean by default
                                  # check https://docs.trychroma.com/guides#changing-the-distance-function
                                  collection_metadata={"hnsw:space": "cosine"},
                                  persist_directory="./wiki_db")

#<\make embedings and store in chroma vec db>

In [None]:
#<load chroma db from disc if already created>
chroma_db = Chroma(persist_directory="./wiki_db",
                   collection_name='wiki_db',
                   embedding_function=openai_embed_model)
#<\load chroma db from disc if already created>

In [None]:
#<make retriever>
similarity_retriever = chroma_db.as_retriever(search_type="similarity_score_threshold",
                                              search_kwargs={"k": 5, "score_threshold": 0.2})
#<\make retriever>

In [None]:
#<test retriever>
similarity_retriever.invoke('what is the capital of India?')
#<\test retriever>

In [None]:
#<make QnA rag chain>
# pprompt = hub.pull("rlm/rag-prompt")
# prompt
prompt = """You are an assistant for question-answering tasks.
            Use the following pieces of retrieved context to answer the question.
            If you don't know the answer, just say that you don't know.
            Keep the answer upto 5 lines unless the user asks for more information

            Question:
            {question}

            Context:
            {context}

            Answer:
         """

prompt_template = ChatPromptTemplate.from_template(prompt)

#helper
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#rag chain
qa_rag_chain = (
    {
        "context": (similarity_retriever
                      |
                    format_docs),
        "question": RunnablePassthrough()
    }
      |
    prompt_template
      |
    chatgpt
)
#<\make QnA rag chain>

In [None]:
#<test query>
query = "What is the fastest fish to eat?"
result = qa_rag_chain.invoke(query)
print(result.content)
#<\test query>

In [None]:
#<make conversational rag>
# rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")
# rephrase_prompt
rephrase_system_prompt = """Given a chat history and the latest user question
which might reference context in the chat history, formulate a standalone question
which can be understood without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is."""

rephrase_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", rephrase_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

#history aware retriever
history_aware_retriever = create_history_aware_retriever(
    chatgpt, similarity_retriever, rephrase_prompt
)
# print(history_aware_retriever)

#qa rag chain
qa_system_prompt = """You are an assistant for question-answering tasks.
                      Use the following pieces of retrieved context to answer the question.
                      If you don't know the answer, just say that you don't know.
                      Keep the answer upto 5 lines unless the user asks for more information

                      Context:
                      {context}
                  """

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(chatgpt, qa_prompt)

qa_rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
# print(qa_rag_chain)

#<\make conversational rag>

In [None]:
chat_history = []

question = "What is the capital of India?"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
print(response['answer'])

In [None]:
for chunk in qa_rag_chain.stream({"input": question, "chat_history": chat_history}):
  print(chunk)

In [None]:
chat_history

In [None]:
chat_history.extend([HumanMessage(content=question),
                     AIMessage(content=response["answer"])])
chat_history

In [None]:
question = "Tell me more about this city"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
print(response['answer'])

In [None]:
chat_history.extend([HumanMessage(content=question),
                     AIMessage(content=response["answer"])])
chat_history

In [None]:
question = "Can fish really fly?"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
print(response['answer'])

In [None]:
response

In [None]:
chat_history.extend([HumanMessage(content=question),
                     AIMessage(content=response["answer"])])

In [None]:
chat_history

In [None]:
chat_history[-2:]

In [None]:
question = "What is the fastest animal?"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question),
                     AIMessage(content=response["answer"])])
print(response['answer'])

In [None]:
question = "Tell me about its different species"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
chat_history.extend([HumanMessage(content=question),
                     AIMessage(content=response["answer"])])
print(response['answer'])

In [None]:
#<example with returning the source of info>
chat_history = []
question = "which is the fastest animal?"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
print('Answer:', response['answer'])
print('Sources:')
for document in response['context']:
    print(document)
    print()
#<\example with returning the source of info>

In [None]:
chat_history.extend([HumanMessage(content=question),
                     AIMessage(content=response["answer"])])

In [None]:
question = "Tell me more, including different types of this animal and their details"
response = qa_rag_chain.invoke({"input": question, "chat_history": chat_history})
print('Answer:', response['answer'])
print('Sources:')
for document in response['context']:
    print(document)
    print()