### Google Colab notebook to connect to Google Gemini 1.5 Flash and apply RAG using Chroma.

In [3]:
# Install necessary libraries
!pip install google-generativeai chromadb sentence_transformers -q

In [4]:


import google.generativeai as genai
import chromadb
from sentence_transformers import SentenceTransformer
import os
from google.colab import userdata

# Set up Google API key (replace with your actual key)
#os.environ['GOOGLE_API_KEY'] = 'your_api_key_here'
#genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))

# Initialize Gemini model
model = genai.GenerativeModel('gemini-1.5-pro')

# Initialize Sentence Transformer for embedding
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize Chroma
chroma_client = chromadb.Client()
collection = chroma_client.create_collection("my_collection")

# Function to add documents to Chroma
def add_documents(docs):
    embeddings = embedder.encode(docs).tolist()
    collection.add(
        embeddings=embeddings,
        documents=docs,
        ids=[f"doc_{i}" for i in range(len(docs))]
    )

# Function to perform RAG
def rag_query(query, k=3):
    # Embed the query
    query_embedding = embedder.encode([query]).tolist()[0]

    # Retrieve relevant documents
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=k
    )

    # Construct prompt with retrieved context
    context = "\n".join(results['documents'][0])
    prompt = f"Context:\n{context}\n\nQuery: {query}\n\nAnswer:"

    # Generate response using Gemini
    response = model.generate_content(prompt)

    return response.text

# Example usage
docs = [
    "The sky is blue because of Rayleigh scattering.",
    "Water boils at 100 degrees Celsius at sea level.",
    "The Earth orbits around the Sun."
]

# Add documents to Chroma
add_documents(docs)

# Perform a RAG query
query = "Why is the sky blue?"
answer = rag_query(query)
print(f"Query: {query}")
print(f"Answer: {answer}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Query: Why is the sky blue?
Answer: Rayleigh scattering.

