In [11]:
import sys
import os
import getpass
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.document_loaders import TextLoader
from langchain.embeddings import GooglePalmEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import GooglePalm
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate

In [17]:
api_key = getpass.getpass("Enter your OpenAI API key: ")

In [18]:
documents = []
for file in os.listdir("docs"):
    if file.endswith(".pdf"):
        pdf_path = "./docs/" + file
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())
    elif file.endswith('.docx') or file.endswith('.doc'):
        doc_path = "./docs/" + file
        loader = Docx2txtLoader(doc_path)
        documents.extend(loader.load())
    elif file.endswith('.txt'):
        text_path = "./docs/" + file
        loader = TextLoader(text_path)
        documents.extend(loader.load())

In [19]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
documents = text_splitter.split_documents(documents)

In [20]:
vectordb = Chroma.from_documents(documents, embedding=GooglePalmEmbeddings(google_api_key=api_key), persist_directory="./data")
vectordb.persist()

In [22]:
pdf_qa = ConversationalRetrievalChain.from_llm(
    GooglePalm(google_api_key=api_key),
    vectordb.as_retriever(search_kwargs={'k': 6}),
    return_source_documents=True,
    verbose=False
)

In [23]:
yellow = "\033[0;33m"
green = "\033[0;32m"
white = "\033[0;39m"

In [24]:
chat_history = []
print(f"{yellow}---------------------------------------------------------------------------------")
print('Welcome to the DocBot. You are now ready to start interacting with your documents')
print('---------------------------------------------------------------------------------')

[0;33m---------------------------------------------------------------------------------
Welcome to the DocBot. You are now ready to start interacting with your documents
---------------------------------------------------------------------------------


In [25]:
while True:
    query = input(f"{green}Prompt: ")
    if query == "exit" or query == "quit" or query == "q" or query == "f":
        print('Exiting')
        sys.exit()
    if query == '':
        continue
    result = pdf_qa(
        {"question": query, "chat_history": chat_history})
    print(f"{white}Answer: " + result["answer"])
    chat_history.append((query, result["answer"]))

[0;39mAnswer: ['Xuezhi Wang', 'Jason Wei', 'Dale Schuurmans', 'Quoc Le', 'Ed Chi', 'Sharan Narang', 'Aakanksha Chowdh-
ery', and 'Denny Zhou']
[0;39mAnswer: Optimization trajectory is a list of past solutions with their corresponding optimization scores.
[0;39mAnswer: The meta-prompt contains the following two essential parts:
1. Optimization problem description.
2. Optimization trajectory.
[0;39mAnswer: The meta-prompt contains two essential parts.
1. Optimization problem description.
2. Optimization trajectory.
[0;39mAnswer: This paper proposes Optimization by PROmpting (OPRO), a simple and effective approach to utilize large language models (LLMs) as optimizers. OPRO enables quick adaptation to different tasks by changing the problem description in the prompt, and the optimization process can be customized by adding instructions to specify the desired properties of the solutions.
