# Installation
Install the required libraries for working with Google Generative AI and Chroma DB

In [12]:
!pip install -q U google-generativeai

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-google-genai 2.0.11 requires google-ai-generativelanguage<0.7.0,>=0.6.16, but you have google-ai-generativelanguage 0.6.15 which is incompatible.[0m[31m
[0m

Import necessary libraries and configure the Google Generative AI API key

In [13]:
import google.generativeai as genai
from google.colab import userdata
genai.configure(api_key = userdata.get("GOOGLE_API_KEY"))

 Embed a single string using the text-embedding-004 model

In [15]:
from typing import Dict

   result : Dict = genai.embed_content(
       model="models/text-embedding-004",
       content="What is the meaning of life?",
       task_type="retrieval_document",
       title="Embedding of single string",
   )

   result['embedding']

[-0.02854543,
 0.044588115,
 -0.034197364,
 -0.0042663575,
 -0.04079577,
 0.012999958,
 0.018053582,
 0.06015144,
 -0.0028713925,
 0.009951648,
 0.024832657,
 -0.01683923,
 0.09940116,
 -0.031990346,
 0.018328529,
 -0.109134205,
 0.001190296,
 0.0014311911,
 -0.083155245,
 -0.010203233,
 0.019211812,
 0.0010217889,
 0.053874534,
 -0.0150861535,
 -0.003189089,
 0.019626662,
 -0.0074312133,
 -0.036586244,
 -0.008509182,
 -0.017352631,
 0.058202818,
 0.05446324,
 0.01571296,
 -0.021822602,
 0.048009068,
 0.022641798,
 -0.0069730366,
 0.054272633,
 0.025922865,
 -0.027334303,
 -0.07256842,
 0.028509492,
 -0.03564165,
 0.060492564,
 -0.022731686,
 -0.030770157,
 -0.006176277,
 -0.021891864,
 -0.019659325,
 0.0643669,
 0.03154234,
 0.017379418,
 -0.03679774,
 0.016511764,
 -0.02536976,
 -0.022270117,
 -0.012396498,
 -0.032805424,
 0.054154944,
 -0.04823156,
 -0.021759441,
 -0.03370158,
 -0.025460402,
 -0.017531719,
 -0.052902102,
 0.04005264,
 -0.022417234,
 0.023286799,
 -0.081740536,
 0.05

In [16]:
len(result['embedding'])

768

 Embed multiple strings using the text-embedding-004 model

In [17]:
from typing import Dict

result : Dict = genai.embed_content(
    model = "models/text-embedding-004",
    content=[
        "What is the meaning of life?",
        "What is the meaning of the universe?",
        "What is the meaning of everything?"
    ],
    task_type="retrieval_document",
    title="Embedding of multiple strings"

)
#Print the first 50 characters of each embedding vector and its length

for v in result["embedding"]:
  print(str(v)[:50],"...TRIMMED...",len(v))


[-0.024917068, 0.05347744, -0.026206452, -0.004009 ...TRIMMED... 768
[-0.024336139, 0.095174074, -0.043020867, -0.01074 ...TRIMMED... 768
[-0.007933196, 0.04814023, -0.046043806, -0.020549 ...TRIMMED... 768


Install the LangChain Chroma library for working with Chroma DB

In [18]:
!pip install -Uq langchain-chroma

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.1/611.1 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00

Define a list of documents with metadata for use in Chroma DB

In [21]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyality and friendliness.",
        metadata={"source" : "mammal-pets-doc"},
    ),

    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source" : "fish-pets-doc"},
    ),
     Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source" : "bird-pets-doc"},
    ),
     Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source" : "mammal-pets-doc"}
    ),


]

 Install the LangChain Google Generative AI library

In [22]:
!pip install -Uq langchain-google-genai

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-generativeai 0.8.4 requires google-ai-generativelanguage==0.6.15, but you have google-ai-generativelanguage 0.6.16 which is incompatible.[0m[31m
[0m

Create an embedding model using Google Generative AI

In [28]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001",
google_api_key = userdata.get("GOOGLE_API_KEY"))



In [29]:
#Generate an embedding for a sample query
embeddings.embed_query("What is the meaning of illusion")

[0.030332516878843307,
 -0.014788706786930561,
 -0.009227514266967773,
 -0.0411163792014122,
 0.04812999442219734,
 0.026690930128097534,
 0.00878135859966278,
 -0.015363362617790699,
 0.015509331598877907,
 0.004143483936786652,
 0.004600975662469864,
 -0.002882228698581457,
 -0.03829479217529297,
 -0.01267352793365717,
 -0.0014399164356291294,
 0.03385947644710541,
 0.014149145223200321,
 0.016055051237344742,
 0.0005985197494737804,
 -0.05514579266309738,
 -0.006029795855283737,
 0.04442472383379936,
 0.0014952645869925618,
 0.009597322903573513,
 0.02333139069378376,
 0.010683427564799786,
 -0.014801335521042347,
 -0.05706387013196945,
 -0.044279973953962326,
 0.06945030391216278,
 -0.07691431045532227,
 0.016370106488466263,
 -0.03601636737585068,
 0.036525290459394455,
 0.0005262622144073248,
 -0.03076775372028351,
 0.034348804503679276,
 0.03889456018805504,
 0.03130272030830383,
 0.03516501560807228,
 0.01154819130897522,
 -0.016912145540118217,
 -0.03649287670850754,
 -0.00735

Set up Chroma DB with the documents and embeddings

In [31]:
from langchain_chroma import Chroma
#from langchain_openai import OpenAIEmbeddings

vectorstore= Chroma.from_documents(
    documents,
    embedding=embeddings
)

List available functions in the Chroma vector store

In [32]:
#benefits of vector db:
#all the functions that are in vector db are now given to us as shown in the answer of this code
#and we dont create all those functions on our own, we will just call them and use them
list(dir(vectorstore))

['_Chroma__ensure_collection',
 '_Chroma__query_collection',
 '_LANGCHAIN_DEFAULT_COLLECTION_NAME',
 '__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_asimilarity_search_with_relevance_scores',
 '_chroma_collection',
 '_client',
 '_client_settings',
 '_collection',
 '_collection_metadata',
 '_collection_name',
 '_cosine_relevance_score_fn',
 '_embedding_function',
 '_euclidean_relevance_score_fn',
 '_get_retriever_tags',
 '_max_inner_product_relevance_score_fn',
 '_persist_directory',
 '_select_relevance_score_fn',
 '_similarity_search_with_relevance_scores',
 'aadd_documents',
 'aa

In [35]:
vectorstore.similarity_search("tell me about parrots")

[Document(id='fb2ff2b5-81e3-4fd0-bd8a-5d4e07a6418e', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(id='e352e911-bd30-4f65-a5e2-fb503927c307', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyality and friendliness.'),
 Document(id='243869df-d5ee-4609-884f-0e772346e4c6', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='afa5b241-25ce-4c29-8769-b2e6f7424c2d', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]

Generate an embedding for the word "Dog" and perform a similarity search

In [36]:
embedding = embeddings.embed_query("Dog")
vectorstore.similarity_search_by_vector(embedding)

[Document(id='e352e911-bd30-4f65-a5e2-fb503927c307', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyality and friendliness.'),
 Document(id='fb2ff2b5-81e3-4fd0-bd8a-5d4e07a6418e', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(id='243869df-d5ee-4609-884f-0e772346e4c6', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='afa5b241-25ce-4c29-8769-b2e6f7424c2d', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]

Retrievers

Set up a retriever to fetch the top result from the vector store

In [57]:
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda
retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)#select top result
retriever.batch(["water living"])

[[Document(id='afa5b241-25ce-4c29-8769-b2e6f7424c2d', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.')]]

Set up the ChatGoogleGenerativeAI model for generating responses

In [64]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             api_key=userdata.get('GOOGLE_API_KEY')
)

Define a prompt template for the RAG (Retrieval-Augmented Generation) pipeline

In [65]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message =  """
Answer this question using the provided context only.
{question}:
Context:
{context}
"""

In [67]:
prompt = ChatPromptTemplate.from_messages([("human",message)])

RAG

Build the RAG pipeline by combining the retriever, prompt, and LLM

In [68]:
 rag_chain = {"context": retriever,"question":RunnablePassthrough()} | prompt | llm

Invoke the RAG pipeline to answer a question about Goldfish

In [70]:
response = rag_chain.invoke("Tell me about Goldfish")
print(response.content)

Based on the provided text, goldfish are popular pets for beginners because they require relatively simple care.
