A notebook to ask GPT-3.5-Turbo questions about a PDF document while keeping track of the previous questions (memory).

<a href="https://colab.research.google.com/gist/blekmus/89818776e181cf28dfd09968c419521b/gpt3-5-with-memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install dependancies
!pip install langchain openai pypdf tiktoken chromadb

In [None]:
# download pdf
!curl -o document.pdf "https://bitcoin.org/bitcoin.pdf"

In [None]:
openai_api_key = "sk-something"

In [None]:
# prepare pdf
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken

tokenizer = tiktoken.get_encoding('cl100k_base')

def tiktoken_len(text):
    tokens = tokenizer.encode(text, disallowed_special=())
    return len(tokens)

pdf_loader = PyPDFLoader("./document.pdf")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=tiktoken_len,
    separators=["\n\n", "\n", " ", ""]
)

pdf_data = pdf_loader.load_and_split(text_splitter=text_splitter)

In [None]:
# create embedding
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

vectordb = Chroma.from_documents(
    pdf_data, 
    embedding=embeddings, 
    persist_directory="."
)

vectordb.persist()

In [None]:
# initialize chat
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, openai_api_key=openai_api_key)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
conversation = ConversationalRetrievalChain.from_llm(
    llm, 
    retriever=vectordb.as_retriever(), 
    memory=memory,
)

In [None]:
# chat
chat_history = []

while True:
    user_input = input("> ")
    ai_response = conversation({
        "question": user_input,
        "chat_history": chat_history
    })
    print(ai_response['answer'])