In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
import vexpresso
from vexpresso import transformation, DaftCollection
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader = TextLoader('data/state_of_the_union.txt')
documents = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [4]:
collection = DaftCollection.from_documents(texts)

[32m2023-06-07 01:14:54.216[0m | [1mINFO    [0m | [36mdaft.context[0m:[36mrunner[0m:[36m88[0m - [1mUsing PyRunner[0m


In [5]:
collection.show(5)

source Utf8,text Utf8,vexpresso_index Int64
data/state_of_the_union.txt,"Madam Speaker, Madam Vice President, our First Lady and S...",0
data/state_of_the_union.txt,Groups of citizens blocking tanks with their bodies. Ever...,1
data/state_of_the_union.txt,Putin’s latest attack on Ukraine was premeditated and unp...,2
data/state_of_the_union.txt,We are inflicting pain on Russia and supporting the peopl...,3
data/state_of_the_union.txt,And tonight I am announcing that we will join our allies ...,4


In [6]:
embeddings = OpenAIEmbeddings()

def embed_fn(content):
    return np.array(embeddings.embed_documents(content))

In [7]:
collection = collection.embed("text", embedding_fn=embed_fn,  to="text_embeddings").execute()

In [8]:
collection.show(5)

source Utf8,text Utf8,vexpresso_index Int64,text_embeddings Python
data/state_of_the_union.txt,"Madam Speaker, Madam Vice President, our First Lady and S...",0,"<np.ndarray shape=(1536,) dtype=float64>"
data/state_of_the_union.txt,Groups of citizens blocking tanks with their bodies. Ever...,1,"<np.ndarray shape=(1536,) dtype=float64>"
data/state_of_the_union.txt,Putin’s latest attack on Ukraine was premeditated and unp...,2,"<np.ndarray shape=(1536,) dtype=float64>"
data/state_of_the_union.txt,We are inflicting pain on Russia and supporting the peopl...,3,"<np.ndarray shape=(1536,) dtype=float64>"
data/state_of_the_union.txt,And tonight I am announcing that we will join our allies ...,4,"<np.ndarray shape=(1536,) dtype=float64>"


In [9]:
vecdb = collection.to_langchain("text", "text_embeddings")

In [10]:
qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vecdb)



In [11]:
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.


" The president said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, from a family of public school educators and police officers, a consensus builder, and has received a broad range of support since she was nominated."