# Different VectorStores

### import packages

In [1]:
import sys,os
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import TextLoader

In [2]:
sys.path.append(os.path.abspath(os.path.join('../scripts')))
from vector_store_manager import VectorStoreManager

### load a file to be loaded onto the vectorStore

In [3]:
file_path = "../prompts/context.txt"
loader = TextLoader(file_path)
documents = loader.load()

### chunk the loaded file using CharacterTextSplitte

In [4]:
text_splitter = CharacterTextSplitter(chunk_size = 35, chunk_overlap=0, separator='', strip_whitespace=False)

In [5]:
chunks = text_splitter.split_documents(documents)

### Initialize vectorStore class

In [6]:
manager = VectorStoreManager()

            Consider upgrading to the new and improved v4 client instead!
            See here for usage: https://weaviate.io/developers/weaviate/client-libraries/python
            


embedded weaviate is already listening on port 8079


### Create Weaviate vector store and Load chunks

In [7]:
weaviate_vectorstore = manager.create_weaviate_vectorstore(chunks)

  from .autonotebook import tqdm as notebook_tqdm
/opt/homebrew/lib/python3.11/site-packages/huggingface_hub/inference/_text_generation.py:121: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  @validator("best_of")
/opt/homebrew/lib/python3.11/site-packages/huggingface_hub/inference/_text_generation.py:140: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  @validator("repetition_penalty")
/opt/homebrew/lib/python3.11/site-packages/huggin

Weaviate vector store created successfully.


### Create FAISS vector store and Load chunks

In [8]:
faiss_vectorstore = manager.create_faiss_vectorstore(chunks)

FAISS vector store created successfully.


  if LooseVersion(numpy.__version__) >= "1.19":
  other = LooseVersion(other)


### Create Chroma vector store and Load chunks

In [9]:
chroma_vectorstore = manager.create_chroma_vectorstore(chunks)

Chroma vector store created successfully.


### Evaluate using retrivers

#### weaviate

In [10]:
weaviate_retriever = weaviate_vectorstore.as_retriever()

In [11]:
query = "Who are the parties to the Agreement and what are their defined names?"
weaviate_response = weaviate_retriever.invoke(query)

In [12]:
weaviate_response

[Document(page_content='f this Agreement confidential and n', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='tents of this Agreement to any thir', metadata={'source': '../prompts/context.txt'}),
 Document(page_content=' from entering into this Agreement ', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='e entire Agreement between the part', metadata={'source': '../prompts/context.txt'})]

#### FAISS

In [13]:
faiss_retriever = faiss_vectorstore.as_retriever()

In [14]:
query = "Who are the parties to the Agreement and what are their defined names?"
faiss_response = faiss_retriever.invoke(query)

In [15]:
faiss_response

[Document(page_content='f this Agreement confidential and n', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='tents of this Agreement to any thir', metadata={'source': '../prompts/context.txt'}),
 Document(page_content=' from entering into this Agreement ', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='e entire Agreement between the part', metadata={'source': '../prompts/context.txt'})]

#### Chromadb

In [18]:
query = "Who are the parties to the Agreement and what are their defined names?"
chroma_response = chroma_vectorstore.similarity_search(query)

In [20]:
chroma_response

[Document(page_content='f this Agreement confidential and n', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='tents of this Agreement to any thir', metadata={'source': '../prompts/context.txt'}),
 Document(page_content=' from entering into this Agreement ', metadata={'source': '../prompts/context.txt'}),
 Document(page_content='e entire Agreement between the part', metadata={'source': '../prompts/context.txt'})]