In [1]:
import getpass
import os
import dotenv

dotenv.load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [2]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [3]:
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

In [4]:
prompt_template = """Use the following pieces of context to answer the question at the end. \
If you don't know the answer, just say that you don't know, don't try to make up an answer. \
Use 5 lines for each answer.

Context: {context}

Question: {question}
Answer: """

In [5]:
# Load PDF
loader = PyPDFLoader("./pdf/Thesis.pdf")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(), persist_directory="./chroma_db/chroma_db_openaiembeddings")

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()

# prompt = hub.pull("rlm/rag-prompt")
prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

In [6]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [7]:
rag_chain_with_source.invoke("Explain extensively the SWAN working modes.")

{'context': [Document(page_content='2.2.3 SWAN working modes.\nThere are two working modes deﬁned in the SWAN system: Maintenance mode and\nOperation mode . The Maintenance mode is runned when the aircraft is on the ground\nand has the functions of associate and disassociate the sensor/actuator nodes from the', metadata={'page': 18, 'source': './pdf/Thesis.pdf'}),
  Document(page_content='All the WDCs are connected to a SWAN server. The purpose of the SWAN server is\nto control and manage all the components of the WSN. The software implemented in\nthe SWAN server acts as a gate to the data collected by the wired network through the\nWDCs. The SWAN server also provides an interface to the client applications and the\nHMI.\nThe client applications operate with the data collected by the sensor nodes. Also they\ncan send commands to change the state of the actuator nodes. The HMI permit the\ninteraction to display and manage the data in a comprehensible way.\n2.2.1 Wireless network topolog

In [8]:
# # cleanup
# vectorstore.delete_collection()