In [1]:
%pip install -U -q "google-genai>=1.0.0"
%pip install -q chromadb

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m59.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.2/284.2 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m51.3 MB/s[0m eta [36m0:00:00[

In [2]:
import textwrap
import chromadb
import numpy as np
import pandas as pd

from IPython.display import Markdown
from chromadb import Documents, EmbeddingFunction, Embeddings

In [3]:
from google import genai
from google.colab import userdata


GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=GOOGLE_API_KEY)

In [4]:
for m in client.models.list():
  if 'embedContent' in m.supported_actions:
    print(m.name)

models/embedding-001
models/text-embedding-004
models/gemini-embedding-exp-03-07
models/gemini-embedding-exp


In [16]:
DOCUMENT1 = """
Myanmar is considered to be one of the most geologically "active" areas in the world because it sits on top of the convergence of four of these tectonic plates - the Eurasian plate, the Indian plate, the Sunda plate and the Burma microplate.
"""
DOCUMENT2 = """
There is a major fault called the Sagaing fault, which cuts right through Myanmar north to south and is more than 1,200km (746 miles) long.As the plates move past each other, they can become stuck, building friction until it is suddenly released and the earth shifts, causing an earthquake.
"""
DOCUMENT3 = """
Early data suggests that the movement that caused Friday's 7.7-magnitude earthquake was a "strike-slip" - where two blocks move horizontally along each other.Because the fracture "unzipped" towards the south, it also directed this piled up energy towards the Thai capital, Bangkok, and this is why the earthquake had such an impact so far away.
"""
DOCUMENT4 = """
plates move past each other horizontally at different speeds. While this causes “strike slip” quakes that are normally less powerful than those seen in “subduction zones” like Sumatra, where one plate slides under another, they can still reach magnitudes of 7 to 8
"""
documents = [DOCUMENT1, DOCUMENT2, DOCUMENT3,DOCUMENT4]


In [17]:
from google.genai import types

class GeminiEmbeddingFunction(EmbeddingFunction):
  def __call__(self, input: Documents) -> Embeddings:
    EMBEDDING_MODEL_ID = "models/embedding-001"  # @param ["models/embedding-001", "models/text-embedding-004", "models/gemini-embedding-exp-03-07", "models/gemini-embedding-exp"] {"allow-input": true, "isTemplate": true}
    title = "Custom query"
    response = client.models.embed_content(
        model=EMBEDDING_MODEL_ID,
        contents=input,
        config=types.EmbedContentConfig(
          task_type="retrieval_document",
          title=title
        )
    )

    return response.embeddings[0].values

In [18]:
def create_chroma_db(documents, name):
  chroma_client = chromadb.Client()
  db = chroma_client.create_collection(
      name=name,
      embedding_function=GeminiEmbeddingFunction()
  )

  for i, d in enumerate(documents):
    db.add(
      documents=d,
      ids=str(i)
    )
  return db

In [20]:
db = create_chroma_db(documents, "Recent_Earthquakes_Mynmar_Thailand")

  embedding_function=GeminiEmbeddingFunction()


In [25]:
sample_data = db.get(include=['documents', 'embeddings'])

df = pd.DataFrame({
    "IDs": sample_data['ids'][:4],
    "Documents": sample_data['documents'][:4],
    "Embeddings": [str(emb)[:100] + "..." for emb in sample_data['embeddings'][:4]]  # Truncate embeddings
})

print(df)

  IDs                                          Documents  \
0   0  \nMyanmar is considered to be one of the most ...   
1   1  \nThere is a major fault called the Sagaing fa...   
2   2  \nEarly data suggests that the movement that c...   
3   3  \nplates move past each other horizontally at ...   

                                          Embeddings  
0  [ 4.58806120e-02 -6.54670596e-02 -6.98784217e-...  
1  [ 0.00818609 -0.04567146 -0.08648509 -0.026589...  
2  [ 0.00720571 -0.03110965 -0.08459836  0.001568...  
3  [ 1.99567946e-03 -5.69717437e-02 -6.67289943e-...  


In [26]:
def get_relevant_passage(query, db):
  passage = db.query(query_texts=[query], n_results=1)['documents'][0][0]
  return passage

In [27]:
# Perform embedding search
passage = get_relevant_passage("major fault", db)
Markdown(passage)


There is a major fault called the Sagaing fault, which cuts right through Myanmar north to south and is more than 1,200km (746 miles) long.As the plates move past each other, they can become stuck, building friction until it is suddenly released and the earth shifts, causing an earthquake.


In [13]:
def make_prompt(query, relevant_passage):
  escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
  prompt = ("""
    You are a helpful and informative bot that answers questions using
    text from the reference passage included below.
    Be sure to respond in a complete sentence, being comprehensive,
    including all relevant background information.
    However, you are talking to a non-technical audience, so be sure to
    break down complicated concepts and strike a friendly
    and converstional tone. If the passage is irrelevant to the answer,
    you may ignore it.
    QUESTION: '{query}'
    PASSAGE: '{relevant_passage}'

    ANSWER:
  """).format(query=query, relevant_passage=escaped)

  return prompt

In [28]:
query = "why plates moving?"
prompt = make_prompt(query, passage)
Markdown(prompt)


    You are a helpful and informative bot that answers questions using
    text from the reference passage included below.
    Be sure to respond in a complete sentence, being comprehensive,
    including all relevant background information.
    However, you are talking to a non-technical audience, so be sure to
    break down complicated concepts and strike a friendly
    and converstional tone. If the passage is irrelevant to the answer,
    you may ignore it.
    QUESTION: 'why plates moving?'
    PASSAGE: ' There is a major fault called the Sagaing fault, which cuts right through Myanmar north to south and is more than 1,200km (746 miles) long.As the plates move past each other, they can become stuck, building friction until it is suddenly released and the earth shifts, causing an earthquake. '

    ANSWER:
  

In [29]:
MODEL_ID = "gemini-2.0-flash"  # @param ["gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-pro-exp-03-25"] {"allow-input": true, "isTemplate": true}
answer = client.models.generate_content(
    model = MODEL_ID,
    contents = prompt
)
Markdown(answer.text)

Well, according to the passage, plates move past each other and sometimes get stuck. This causes friction to build up. Then suddenly all that built up friction is released, the earth shifts, and that's how you get an earthquake.
