# Installation
Install the required libraries for working with Google Generative AI and Chroma DB

In [1]:
!pip install -q U google-generativeai

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/58.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.4/58.4 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h

Import necessary libraries and configure the Google Generative AI API key

In [2]:
import google.generativeai as genai
from google.colab import userdata
genai.configure(api_key = userdata.get("GOOGLE_API_KEY"))

 Embed a single string using the text-embedding-004 model

In [None]:
from typing import Dict
result : Dict = genai.embed_content(
       model="models/text-embedding-004",
       content="What is the meaning of life?",
       task_type="retrieval_document",
       title="Embedding of single string",
   )

result['embedding']

In [7]:
len(result['embedding'])

768

 Embed multiple strings using the text-embedding-004 model

In [8]:
from typing import Dict

result : Dict = genai.embed_content(
    model = "models/text-embedding-004",
    content=[
        "What is the meaning of life?",
        "What is the meaning of the universe?",
        "What is the meaning of everything?"
    ],
    task_type="retrieval_document",
    title="Embedding of multiple strings"

)
#Print the first 50 characters of each embedding vector and its length

for v in result["embedding"]:
  print(str(v)[:50],"...TRIMMED...",len(v))


[-0.024917068, 0.05347744, -0.026206452, -0.004009 ...TRIMMED... 768
[-0.024336139, 0.095174074, -0.043020867, -0.01074 ...TRIMMED... 768
[-0.007933196, 0.04814023, -0.046043806, -0.020549 ...TRIMMED... 768


Install the LangChain Chroma library for working with Chroma DB

In [None]:
!pip install -Uq langchain-chroma

Define a list of documents with metadata for use in Chroma DB

In [10]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyality and friendliness.",
        metadata={"source" : "mammal-pets-doc"},
    ),

    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source" : "fish-pets-doc"},
    ),
     Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source" : "bird-pets-doc"},
    ),
     Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source" : "mammal-pets-doc"}
    ),


]

 Install the LangChain Google Generative AI library

In [None]:
!pip install -Uq langchain-google-genai

Create an embedding model using Google Generative AI

In [12]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001",
google_api_key = userdata.get("GOOGLE_API_KEY"))



In [None]:
#Generate an embedding for a sample query
embeddings.embed_query("What is the meaning of illusion")

Set up Chroma DB with the documents and embeddings

In [14]:
from langchain_chroma import Chroma
#from langchain_openai import OpenAIEmbeddings

vectorstore= Chroma.from_documents(
    documents,
    embedding=embeddings
)

List available functions in the Chroma vector store

In [15]:
#benefits of vector db:
#all the functions that are in vector db are now given to us as shown in the answer of this code
#and we dont create all those functions on our own, we will just call them and use them
list(dir(vectorstore))

['_Chroma__ensure_collection',
 '_Chroma__query_collection',
 '_LANGCHAIN_DEFAULT_COLLECTION_NAME',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_chroma_collection',
 '_client',
 '_client_settings',
 '_collection',
 '_collection_metadata',
 '_collection_name',
 '_cosine_relevance_score_fn',
 '_embedding_function',
 '_euclidean_relevance_score_fn',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_persist_directory',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 'aadd_documents',
 'aa

In [16]:
vectorstore.similarity_search("tell me about parrots")

[Document(id='14343dcb-ba76-4c0f-b0aa-98a170dd97e6', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(id='32a0d142-f2c6-4d6c-b805-a9b3a36c67d4', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyality and friendliness.'),
 Document(id='3770fb08-8864-445c-823c-7528e38ad606', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='82d620ec-23b1-4837-8954-21628c75aaeb', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]

Generate an embedding for the word "Dog" and perform a similarity search

In [17]:
embedding = embeddings.embed_query("Dog")
vectorstore.similarity_search_by_vector(embedding)

[Document(id='32a0d142-f2c6-4d6c-b805-a9b3a36c67d4', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyality and friendliness.'),
 Document(id='14343dcb-ba76-4c0f-b0aa-98a170dd97e6', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(id='3770fb08-8864-445c-823c-7528e38ad606', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='82d620ec-23b1-4837-8954-21628c75aaeb', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]

Retrievers

Set up a retriever to fetch the top result from the vector store

In [18]:
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda
retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)#select top result
retriever.batch(["water living"])

[[Document(id='82d620ec-23b1-4837-8954-21628c75aaeb', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]]

Set up the ChatGoogleGenerativeAI model for generating responses

In [19]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             api_key=userdata.get('GOOGLE_API_KEY')
)

Define a prompt template for the RAG (Retrieval-Augmented Generation) pipeline

In [20]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message =  """
Answer this question using the provided context only.
{question}:
Context:
{context}
"""

In [21]:
prompt = ChatPromptTemplate.from_messages([("human",message)])

RAG

Build the RAG pipeline by combining the retriever, prompt, and LLM

In [22]:
 rag_chain = {"context": retriever,"question":RunnablePassthrough()} | prompt | llm

Invoke the RAG pipeline to answer a question about Goldfish

In [23]:
response = rag_chain.invoke("Tell me about Goldfish")
print(response.content)

Based on the provided text, goldfish are popular pets for beginners because they require relatively simple care.
