In [None]:
# Source: https://medium.com/geekculture/automating-pdf-interaction-with-langchain-and-chatgpt-e723337f26a6

In [None]:
!curl -o paper.pdf https://arxiv.org/pdf/2303.13519.pdf

In [None]:
!pip install langchain pypdf chromadb openai tiktoken

In [None]:
from langchain.document_loaders import PyPDFLoader # for loading the pdf
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
from langchain.vectorstores import Chroma # for the vectorization part
from langchain.chains import ChatVectorDBChain # for chatting with the pdf
from langchain.llms import OpenAI # the LLM model we'll use (CHatGPT)

In [None]:
pdf_path = "./paper.pdf"
loader = PyPDFLoader(pdf_path)
pages = loader.load_and_split()
# print(pages[0].page_content)

In [None]:
import os

# Set your OpenAI API key
api_key = 'xxxx'

# Set the environment variable
os.environ['OPENAI_API_KEY'] = api_key

In [None]:
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(pages, embedding=embeddings, persist_directory=".")
vectordb.persist()

In [None]:
pdf_qa = ChatVectorDBChain.from_llm(OpenAI(temperature=0.5, model_name="gpt-3.5-turbo"), vectordb, return_source_documents=True)

In [14]:
query = "What is the VideoTaskformer?"
result = pdf_qa({"question": query, "chat_history": ""})
print("Answer:")
print(result["answer"])

Answer:
VideoTaskformer is a pre-trained video model that focuses on representing the semantics and structure of instructional videos. It uses a simple and effective objective of predicting weakly supervised textual labels for steps that are randomly masked out from an instructional video (masked step modeling). It learns step embeddings that encapsulate the semantics and temporal ordering of steps within a task, making them context-aware and possessing global knowledge of task structure. It can verify if an unseen video correctly executes a given task, as well as forecast which steps are likely to be taken after a given step. It outperforms previous baselines on various benchmark tasks and achieves new state-of-the-art performance.
