In [2]:
from langchain_core.vectorstores import InMemoryVectorStore

from langchain_openai import OpenAIEmbeddings

from langchain.vectorstores import FAISS            # for gpu , cpu

from langchain.docstore.document import Document

In [3]:
#pip install faiss-cpu

In [4]:
embedding_model = OpenAIEmbeddings()

In [5]:
# Sample documents to store in the vector store
documents = [
    Document(page_content="This is a document about machine learning."),
    Document(page_content="This document discusses artificial intelligence."),
    Document(page_content="This text is about data science.")
]

In [6]:
documents

[Document(metadata={}, page_content='This is a document about machine learning.'),
 Document(metadata={}, page_content='This document discusses artificial intelligence.'),
 Document(metadata={}, page_content='This text is about data science.')]

In [7]:
vector_store = FAISS.from_documents(documents, 
                                    embedding_model)

In [8]:
query = "Tell me about AI."
query_embedding = embedding_model.embed_query(query)

In [9]:
similar_docs = vector_store.similarity_search(query, k =2)

In [10]:
similar_docs

[Document(metadata={}, page_content='This document discusses artificial intelligence.'),
 Document(metadata={}, page_content='This is a document about machine learning.')]

#### InMemory vector store

In [11]:
vector_store = InMemoryVectorStore(embedding=embedding_model)

In [12]:
from langchain_core.documents import Document

In [13]:
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata    ={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata    ={"source": "news"},
)

In [14]:
documents = [document_1, document_2]

In [15]:
vector_store.add_documents(documents=documents)

['46dbbe04-66ce-4770-aee9-d76c9197b4a8',
 '7f75d076-6597-41f3-a020-f6cae4a1c358']

In [16]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [17]:
embeddings_model = OpenAIEmbeddings(
    model             = "text-embedding-3-small", # "text-embedding-3-large" 'text-embedding-ada-002'
    show_progress_bar = True
)

In [18]:
# Your data to be embedded (list of strings)
data = ["What is the capital of France?", 
        "My Car engine number is 07484949", 
        "Who won the world cup in 2018?"]

In [19]:
embeddings = embedding_model.embed_documents(data)

In [20]:
len(embeddings), len(embeddings[0])

(3, 1536)

In [21]:
import faiss
import numpy as np

In [22]:
embeddings_np = np.array(embeddings).astype('float32')

In [23]:
dimension = embeddings_np.shape[1]  # Set dimension based on your embeddings

index     = faiss.IndexFlatL2(dimension)

In [24]:
index.add(embeddings_np)

#### How do we use these vector stores

In [73]:
data = ["What is the capital of France?", 
        "My car has engines on the rear side.", 
        "Who won the world cup in 2018?"]

In [74]:
documents = [Document(page_content=text) for text in data]

In [75]:
vectorstore = FAISS.from_documents(documents = documents, 
                                   embedding = embeddings_model)

  0%|          | 0/1 [00:00<?, ?it/s]

In [76]:
vectorstore.save_local("faiss_index_test")

In [77]:
persisted_vectorstore = FAISS.load_local(folder_path = r"faiss_index_test", 
                                         embeddings  = embeddings_model,
                                         allow_dangerous_deserialization=True
                                        )


In [78]:
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI

In [80]:
query = 'Engines of my car'

In [81]:
# Use RetrievalQA chain for orchestration
qa = RetrievalQA.from_chain_type(llm       = OpenAI(), 
                                 #chain_type= "stuff", 
                                 retriever = vectorstore.as_retriever())

In [82]:
result = qa.run(query)
print(result)

  0%|          | 0/1 [00:00<?, ?it/s]

 The engines on your car are located on the rear side.
