In [None]:
!pip install langchain chromadb faiss-cpu openai tiktoken langchain_openai langchain-community wikipedia

In [43]:
!pip install langchain-chroma -q

In [46]:
import google.generativeai as genai
from langchain.embeddings.base import Embeddings
from langchain_core.documents import Document
from langchain_chroma import Chroma


genai.configure(api_key="GOOGLE_API_KEY")

class GeminiEmbeddingFunction(Embeddings):
    def embed_documents(self, texts):
        return [
            genai.embed_content(
                model="models/embedding-001",
                content=text,
                task_type="retrieval_document"
            )["embedding"] for text in texts
        ]
    def embed_query(self, text):
        return genai.embed_content(
            model="models/embedding-001",
            content=text,
            task_type="retrieval_query"
        )["embedding"]


In [47]:
documents = [
    Document(page_content="LangChain helps developers build LLM applications easily."),
    Document(page_content="Chroma is a vector database optimized for LLM-based search."),
    Document(page_content="Embeddings convert text into high-dimensional vectors."),
    Document(page_content="OpenAI provides powerful embedding models."),
]

In [49]:
# Create Chroma vector store in memory
vector_store = Chroma.from_documents(
    embedding=GeminiEmbeddingFunction(),
    persist_directory='my_chroma_db_new',
    documents=documents,
    collection_name='sample'
)

In [50]:
# Convert vectorstore into a retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [51]:
query = "What is Chroma used for?"
results = retriever.invoke(query)

In [52]:
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)
# print(results)


--- Result 1 ---
Chroma is a vector database optimized for LLM-based search.

--- Result 2 ---
Embeddings convert text into high-dimensional vectors.


In [53]:
results = vector_store.similarity_search(query, k=2)

In [54]:
for i, doc in enumerate(results):
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
Chroma is a vector database optimized for LLM-based search.

--- Result 2 ---
Embeddings convert text into high-dimensional vectors.


In [29]:
print(5)

5
