<a href="https://colab.research.google.com/github/cdelia/ai_colabs/blob/main/ChatAboutPdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 💬📄 Chat About PFD
### This notebook let's you ask ChatGPT4 questions regarding the PDF you upload.

(First we need to work around a compatibility problem.
At the time this is being written chromadb doesn't work with the newer versions of Pydantic. Eventually we should be able to remove or skip this step...)

In [None]:
!pip install -q pydantic==1.10.12

Let's install the requirements

In [None]:
!pip install -q langchain openai pypdf gradio chromadb tiktoken

Now let's enter an OpenAI key.
You can create one here: [page](https://platform.openai.com/account/api-keys)


---



In [None]:
import os
import getpass as getpass

os.environ['OPENAI_API_KEY'] = getpass.getpass("Enter OpenAI API Key: ")


In [None]:
import os
from langchain.agents.agent_toolkits import (create_vectorstore_agent, VectorStoreInfo, VectorStoreToolkit)
from langchain.document_loaders import  PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Chroma
import gradio as gr

apikey = os.getenv('OPENAI_API_KEY')
llm = ChatOpenAI(model_name="gpt-4", temperature=0.9)
embeddings = OpenAIEmbeddings()
agent_executor = None

def learn_contents(file_path):
    global agent_executor
    loader = PyPDFLoader(file_path)
    pages = loader.load_and_split()
    store = Chroma.from_documents(pages, embeddings, collection_name='documentContents')
    vectorStore_info = VectorStoreInfo(
        name='pdf document',
        description="The pdf document to search",
        vectorstore=store
    )
    toolkit = VectorStoreToolkit(vectorstore_info=vectorStore_info)
    agent_executor = create_vectorstore_agent(
         llm=llm,
         toolkit=toolkit,
         verbose=True
    )

def upload_file(files):
    file_paths = [file.name for file in files]
    for file_path in file_paths:
        learn_contents(file_path)
    return file_paths

def reply_to(prompt):
    return agent_executor.run(prompt)

with gr.Blocks() as demo:
    file_output = gr.File()
    upload_button = gr.UploadButton("Click to Upload a File", file_count="multiple")
    upload_button.upload(upload_file, upload_button, file_output)
    prompt_box = gr.Textbox(label="Prompt")
    reply_box = gr.Textbox(label="Reply")
    reply_button = gr.Button("Submit")
    reply_button.click(fn=reply_to, inputs=prompt_box, outputs=reply_box, api_name="reply_to")
demo.launch(debug=True)
