##### Copyright 2025 Google LLC.

In [None]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Gemini API: Entity extraction

Use Gemini API to speed up some of your tasks, such as searching through text to extract needed information. Entity extraction with a Gemini model is a simple query, and you can ask it to retrieve its answer in the form that you prefer.

This notebook shows how to extract entities into a list.

<a target="_blank" href="https://colab.research.google.com/github/google-gemini/cookbook/blob/main/examples/Entity_Extraction.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=30/></a>

## Setup

In [1]:
%pip install -U -q "google-genai>=1.0.0"

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/159.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m159.7/159.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

## Configure your API key

To run the following cell, your API key must be stored it in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) for an example.

In [2]:
from google import genai
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=GOOGLE_API_KEY)

# Select the model

Additionally, select the model you want to use from the available options below:

In [6]:
MODEL_ID = "gemini-2.0-flash"  # @param ["gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-2.5-flash-preview-04-17","gemini-2.5-pro-exp-03-25"] {"allow-input": true, "isTemplate": true}

In [19]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib gspread

Collecting google-api-python-client
  Downloading google_api_python_client-2.167.0-py2.py3-none-any.whl.metadata (6.7 kB)
Collecting google-auth-oauthlib
  Downloading google_auth_oauthlib-1.2.2-py3-none-any.whl.metadata (2.7 kB)
Downloading google_api_python_client-2.167.0-py2.py3-none-any.whl (13.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.2/13.2 MB[0m [31m62.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading google_auth_oauthlib-1.2.2-py3-none-any.whl (19 kB)
Installing collected packages: google-auth-oauthlib, google-api-python-client
  Attempting uninstall: google-auth-oauthlib
    Found existing installation: google-auth-oauthlib 1.2.1
    Uninstalling google-auth-oauthlib-1.2.1:
      Successfully uninstalled google-auth-oauthlib-1.2.1
  Attempting uninstall: google-api-python-client
    Found existing installation: google-api-python-client 2.164.0
    Uninstalling google-api-python-client-2.164.0:
      Successfully uninstalled google-api-python-clie

In [20]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()
gc = gspread.authorize(creds)

In [22]:
sh = gc.open_by_key('1dYMnP5IhTAsOqX-lZyOSIsn1U2FZO1yYOb6LHWhcY3c') # replace with your spreadsheet key
worksheet = sh.worksheet('Sheet3') #replace with your sheet name

In [25]:
cell_value = worksheet.acell('H15').value # extract cell H3 for the header
print(cell_value)

[Aplausos] [Música] [Aplausos] en las últimas dos semanas Se ha producido un aumento de la acción principalmente a través de campamentos de solidaridad en los campus entre la diáspora Palestina y sus comunidades de apoyo en el extranjero inspiradas por el ejemplo que los estudiantes de la Universidad de Columbia dieron con su campamento de solidaridad con gaza las organizaciones universitarias de todo el mundo se están sumando en menos de dos semanas hemos visto como los campamentos de solidaridad se han extendido rápidamente por todo Estados Unidos y otros países aliados con la entidad sionista los campamentos de solidaridad con gaza deben socavar su separación mediante el empleo de estrategias exitosas para su lucha a saber la unidad de los campus con el fin de escalar de simples demandas de desinversión a una realidad verdaderamente anticolonial que puede imponerse a las instituciones occidentales los campamentos de solidaridad se han formado a partir de la conciencia de personas ju

# Examples

### Extracting few entities at once

This block of text is about possible ways to travel from the airport to the Colosseum.  

Let's extract all street names and proposed forms of transportation from it.

In [10]:
direcciones = """

"""

In [18]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


You will use Gemini 2.0 Flash model for fast responses.

In [None]:
from IPython.display import Markdown

directions_prompt = f"""
  Del texto dado, extraiga las siguientes entidades y devuelva una lista de cuantas veces aparece cada una.
Escribe todas las variantes posibles de las entidades.
Agrega el contexto en el que aparece cada una.
Analiza y indica si la entidad fue descrita en un texto positivo o negativo.
Entidades a extraer: ucrania, rusia, niño, matar, israel, putin, zelenski, genocidio.
Texto: {cell_value}
ucrania = []
rusia = []
niño = []
matar = []
putin = []
zelenski  = []
genocidio = []
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=directions_prompt
)

Markdown(response.text)

You can modify the form of the answer for your extracted entities even more:

In [None]:
directions_list_prompt = f"""
  From the given text, extract the following entities and
  return a list of them.
  Entities to extract: street name, form of transport.
  Text: {directions}
  Return your answer as two lists:
  Street = [street names]
  Transport = [forms of transport]
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=directions_list_prompt
)

Markdown(response.text)

Here's the extracted information:

Street = ['Via dei Fori Imperiali', 'Via del Corso', 'Via dei Fori Imperiali', 'Via della Lungara']
Transport = ['train', 'metro', 'bus', 'shuttle', 'taxi', 'transfer service']


### Numbers

Try entity extraction of phone numbers

In [None]:
customer_service_email = """
  Hello,
  Thank you for reaching out to our customer support team regarding your
  recent purchase of our premium subscription service.
  Your activation code has been sent to +87 668 098 344
  Additionally, if you require immediate assistance, feel free to contact us
  directly at +1 (800) 555-1234.
  Our team is available Monday through Friday from 9:00 AM to 5:00 PM PST.
  For after-hours support, please call our
  dedicated emergency line at +87 455 555 678.
  Thanks for your business and look forward to resolving any issues
  you may encounter promptly.
  Thank you.
"""

In [None]:
phone_prompt = f"""
  From the given text, extract the following entities and return a list of them.
  Entities to extract: phone numbers.
  Text: {customer_service_email}
  Return your answer in a list:
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=phone_prompt
)

Markdown(response.text)

```json
[
  "+87 668 098 344",
  "+1 (800) 555-1234",
  "+87 455 555 678"
]
```

### URLs


Try entity extraction of URLs and get response as a clickable link.

In [None]:
url_text = """
  Gemini API billing FAQs

  This page provides answers to frequently asked questions about billing
  for the Gemini API. For pricing information, see the pricing page
  https://ai.google.dev/pricing.
  For legal terms, see the terms of service
  https://ai.google.dev/gemini-api/terms#paid-services.

  What am I billed for?
  Gemini API pricing is based on total token count, with different prices
  for input tokens and output tokens. For pricing information,
  see the pricing page https://ai.google.dev/pricing.

  Where can I view my quota?
  You can view your quota and system limits in the Google Cloud console
  https://console.cloud.google.com/apis/api/generativelanguage.googleapis.com/quotas.

  Is GetTokens billed?
  Requests to the GetTokens API are not billed,
  and they don't count against inference quota.
"""

In [None]:
url_prompt = f"""
  From the given text, extract the following entities and return a list of them.
  Entities to extract: URLs.
  Text: {url_text}
  Do not duplicate entities.
  Return your answer in a markdown format:
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=url_prompt
)

Markdown(response.text)

```
- https://ai.google.dev/pricing
- https://ai.google.dev/gemini-api/terms#paid-services
- https://console.cloud.google.com/apis/api/generativelanguage.googleapis.com/quotas
```