In [1]:
import re
import time
from io import BytesIO
from typing import Any, Dict, List

import openai
import streamlit as st
from langchain import LLMChain, OpenAI
from langchain.agents import AgentExecutor, Tool, ZeroShotAgent
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain
from langchain.docstore.document import Document
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import VectorStore
from langchain.vectorstores.faiss import FAISS
from pypdf import PdfReader
import os
from dotenv import load_dotenv

# Goal is to create a chatbot using a PDF to with conversational memory
# 1. Read PDF and parse text data
# 2. Convert text to docs 
# 3. create index with doc embeddings using Faiss
# 4. setup the qa system using openai api RetrievalQA with index.retriver()
# 5. Setup agent using the model, index and conversation history
# 6. Start talking


In [2]:
# Define a function to parse a PDF file and extract its text content
def parse_pdf(file: BytesIO) -> List[str]:
    pdf = PdfReader(file)
    output = []
    for page in pdf.pages:
        text = page.extract_text()
        # Merge hyphenated words
        text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
        # Fix newlines in the middle of sentences
        text = re.sub(r"(?<!\n\s)\n(?!\s\n)", " ", text.strip())
        # Remove multiple newlines
        text = re.sub(r"\n\s*\n", "\n\n", text)
        output.append(text)
    return output

In [3]:
doc = parse_pdf('data/file1.pdf')
doc


['Introduction   Monetto Mart has a section for food delivery like Pizzas etc.  inside gas station mart. We are looking  to develop a food ordering  system comprising of customer mobile app, kitchen monitor app, and  CMS system.  \n\nMenu   Our menu comprises of the items shown below. For pizza, please refer to  https://www.huntbrotherspizza.com/menu/ . Additional items  will be available as Add -Ons.   - Pizza by the Slice   - Whole Piz za  - Add-Ons  o Sandwich   \uf0a7 BBQ   \uf0a7 Chicken   \uf0a7 Veggie Patty   o Hotdogs   o Pastries   o Cookies   o Baklava  \n\nReference   Please refer to the Dominos India App  for development guidelines.  \n\nComplete Development Plat form   The complete system will comprise of  a combination of mobile and desktop applications.   - Customer App : Customer App ( Android and IOS ) will be available on relevant app stores for  download. Using this the customers will be able to locate the location and order from  the  mobile app while on the move.  

In [4]:
def text_to_docs(text: str) -> List[Document]:
    """Converts a string or list of strings to a list of Documents
    with metadata."""
    if isinstance(text, str):
        # Take a single string as one page
        text = [text]
    page_docs = [Document(page_content=page) for page in text]

    # Add page numbers as metadata
    for i, doc in enumerate(page_docs):
        doc.metadata["page"] = i + 1

    # Split pages into chunks
    doc_chunks = []

    for doc in page_docs:
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=2000,
            separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
            chunk_overlap=0,
        )
        chunks = text_splitter.split_text(doc.page_content)
        for i, chunk in enumerate(chunks):
            doc = Document(
                page_content=chunk, metadata={"page": doc.metadata["page"], "chunk": i}
            )
            # Add sources a metadata
            doc.metadata["source"] = f"{doc.metadata['page']}-{doc.metadata['chunk']}"
            doc_chunks.append(doc)
    return doc_chunks

In [5]:
pages = text_to_docs(doc)
pages


[Document(page_content='Introduction   Monetto Mart has a section for food delivery like Pizzas etc.  inside gas station mart. We are looking  to develop a food ordering  system comprising of customer mobile app, kitchen monitor app, and  CMS system.  \n\nMenu   Our menu comprises of the items shown below. For pizza, please refer to  https://www.huntbrotherspizza.com/menu/ . Additional items  will be available as Add -Ons.   - Pizza by the Slice   - Whole Piz za  - Add-Ons  o Sandwich   \uf0a7 BBQ   \uf0a7 Chicken   \uf0a7 Veggie Patty   o Hotdogs   o Pastries   o Cookies   o Baklava  \n\nReference   Please refer to the Dominos India App  for development guidelines.  \n\nComplete Development Plat form   The complete system will comprise of  a combination of mobile and desktop applications.   - Customer App : Customer App ( Android and IOS ) will be available on relevant app stores for  download. Using this the customers will be able to locate the location and order from  the  mobile ap

In [9]:
from dotenv import load_dotenv
load_dotenv()


True

In [10]:
OPENAI_API_KEY="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"

In [12]:
def test_embed():
    embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
    # Indexing
    # Save in a Vector DB
    # with st.spinner("It's indexing..."):
    index = FAISS.from_documents(pages, embeddings)
    print('Embeddings done...')
    # st.success("Embeddings done.", icon="✅")
    return index

In [13]:
index = test_embed()
index

  warn_deprecated(


Embeddings done...


<langchain_community.vectorstores.faiss.FAISS at 0x1484a1ab0>

In [14]:
# Set up the question-answering system
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(openai_api_key=OPENAI_API_KEY),
    chain_type = "map_reduce", 
    retriever=index.as_retriever(),
)

qa

  warn_deprecated(




In [15]:
# Define tools

tools = [
    Tool(
        name="State of Union QA System",
        func=qa.run,
        description="Useful for when you need to answer questions about the aspects asked. Input may be a partial or fully formed question.",
    )
]
prefix = """Have a conversation with a human, answering the following questions as best you can based on the context and memory available. 
            You have access to a single tool:"""
suffix = """Begin!"

{chat_history}
Question: {input}
{agent_scratchpad}"""



#create prompt using zero shot agent
prompt = ZeroShotAgent.create_prompt(
    tools,
    prefix=prefix,
    suffix=suffix,
    input_variables=["input", "chat_history", "agent_scratchpad"],
)

In [26]:
#setup coonversation memory
memory = ConversationBufferMemory(
                    memory_key="chat_history"
                )

In [None]:
# !pip3 install langchain-community

In [31]:
#create llm chain
from langchain_community.chat_models import ChatOpenAI
llm_chain = LLMChain(
                llm=ChatOpenAI(
                    temperature=0, openai_api_key=OPENAI_API_KEY, model_name="gpt-3.5-turbo"
                ),
                prompt=prompt,
            )
            

  warn_deprecated(


In [33]:
# create an agent
agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)


#execute the agent using agent executor
agent_chain = AgentExecutor.from_agent_and_tools(
    agent=agent, tools=tools, verbose=True, memory =memory 
)


In [34]:
# Allow the user to enter a query and generate a response
query = "What are the various apps that I need ?"

In [35]:
res = agent_chain.run(query)
res



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to provide a list of apps that the person may need.
Action: State of Union QA System
Action Input: "List of essential apps"[0m
Observation: [36;1m[1;3m The list of essential apps includes: Customer App, POS App, Kitchen Monitor App, Kitchen Manager App, CMS Web Interface.[0m
Thought:[32;1m[1;3mI need to make sure I cover all the necessary apps.
Action: State of Union QA System
Action Input: "Are there any other essential apps?"[0m
Observation: [36;1m[1;3m The essential apps mentioned are the customer app, POS app, kitchen monitor app, kitchen manager app, and CMS web interface.[0m
Thought:[32;1m[1;3mI have covered all the necessary apps.
Final Answer: The various apps you need are the Customer App, POS App, Kitchen Monitor App, Kitchen Manager App, and CMS Web Interface.[0m

[1m> Finished chain.[0m


'The various apps you need are the Customer App, POS App, Kitchen Monitor App, Kitchen Manager App, and CMS Web Interface.'