In [2]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import OnlinePDFLoader

def get_context(arxiv_link: str, prompt: str) -> str:

    # Load the document
    loader = OnlinePDFLoader(arxiv_link)
    doc = loader.load()

    # Split the document into sentences
    splitter = RecursiveCharacterTextSplitter()
    sentences = splitter.split(doc)

    # Embed the sentences
    embeddings = OpenAIEmbeddings()
    embedded_sentences = embeddings.embed(sentences)

    # Create a vector store
    store = Chroma()

    # Create a language model
    lm = OpenAI()

    # Create a QA chain
    chain = VectorDBQA(store, lm)

    # Add the embedded sentences to the vector store
    for sentence, embedding in zip(sentences, embedded_sentences):
        store.add(sentence, embedding)

    # Ask the QA chain a question
    return chain.ask(prompt)

ModuleNotFoundError: No module named 'langchain'

In [None]:
get_context('https://arxiv.org/pdf/2006.05987.pdf', 'What is the main idea of the paper?')

In [15]:
loader = OnlinePDFLoader("https://arxiv.org/abs/1706.03762.pdf")
data = loader.load()

ImportError: Failed to load the Detectron2 model. Ensure that the Detectron2 module is correctly installed.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [None]:
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts, embeddings)

In [None]:
qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)


In [None]:
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)