In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [8]:
# Autenticazione su Colab (solo se esegui in Google Colab)
import sys
if "google.colab" in sys.modules:
    from google.colab import auth
    auth.authenticate_user()

# Monta Google Drive (per salvare l'output)
from google.colab import drive
drive.mount('/content/drive')

# Installa le librerie necessarie
!pip install --upgrade --quiet google-genai gspread pandas

import os
import pandas as pd
import xml.etree.ElementTree as ET
import gspread
from google.auth import default
from google import genai
from google.genai.types import GenerateContentConfig

# Imposta il progetto Google Cloud e la regione
PROJECT_ID = "friendly-legacy-450400-q8"  # Sostituisci con il tuo ID progetto
LOCATION = "us-central1"

# Crea il client per Vertex AI (Gen AI SDK)
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

# Definisci il percorso base su Google Drive
BASE_PATH = "/content/drive/MyDrive"

# Funzione per leggere i prompt dal foglio Google o, in alternativa, da un CSV
def leggi_prompt():
    # Prova a leggere da Google Sheets
    try:
        creds, _ = default()
        gc = gspread.authorize(creds)
        spreadsheet = gc.open("prompts")
        worksheet = spreadsheet.sheet1  # usa il primo foglio
        data = worksheet.get_all_records()
        df = pd.DataFrame(data)
        print("Lettura da fogli Google: 'prompts'")
        return df
    except Exception as e:
        print("Impossibile leggere da Google Sheets: ", e)
        # Se fallisce, prova a leggere dal CSV
        CSV_PATH = os.path.join(BASE_PATH, "prompts.csv")
        if os.path.exists(CSV_PATH):
            df = pd.read_csv(CSV_PATH)
            print("Lettura da CSV:", CSV_PATH)
            return df
        else:
            raise FileNotFoundError("Nessun file di input trovato: né Google Sheets 'prompts' né 'prompts.csv'.")

# Leggi i prompt (il DataFrame deve contenere una colonna "Prompt")
df = leggi_prompt()
print("Colonne lette:", df.columns.tolist())
if "Prompt" not in df.columns:
    raise ValueError("Il file di input deve contenere una colonna chiamata 'Prompt'.")

# Definisci il percorso di output per il file XML
XML_OUTPUT_PATH = os.path.join(BASE_PATH, "prompts.xml")

# Crea la radice dell'XML
root = ET.Element("articles")

# Funzione per salvare l'XML corrente
def salva_xml_corrente(percorso):
    tree = ET.ElementTree(root)
    tree.write(percorso, encoding="utf-8", xml_declaration=True)
    print(f"Salvataggio temporaneo eseguito: {percorso}")

# Itera sui prompt e genera il contenuto
for index, row in df.iterrows():
    prompt_text = row["Prompt"]
    print(f"Generazione contenuto per prompt #{index+1}: {prompt_text}")

    # Genera il testo tramite il modello Gemini
    response = client.models.generate_content(
        model="gemini-2.0-flash-exp",  # Modello scelto
        contents=prompt_text,
        config=GenerateContentConfig(
            temperature=0.5,
            top_p=0.8,
            top_k=10,
            candidate_count=1,
            max_output_tokens=8192,
        )
    )
    generated_text = response.text.strip()
    print("Contenuto generato:", generated_text)
    print("-" * 40)

    # Crea un elemento <article> per la risposta generata
    article_elem = ET.SubElement(root, "article")
    article_elem.text = generated_text

    # Salva temporaneamente il file XML dopo ogni articolo
    salva_xml_corrente(XML_OUTPUT_PATH)

print(f"Salvataggio definitivo eseguito: {XML_OUTPUT_PATH}")


[1;30;43mOutput streaming troncato alle ultime 5000 righe.[0m
<h4>Perché la Colazione è Così Importante?</h4>

Ma perché la colazione ha un impatto così forte sulla nostra dieta? Ci sono diverse ragioni. Innanzitutto, una colazione nutriente può promuovere il senso di sazietà e regolare l'appetito. Questo significa che saremo meno propensi a cedere a spuntini poco salutari durante la giornata. Inoltre, una colazione equilibrata può migliorare la concentrazione e le prestazioni cognitive, aiutandoci a fare scelte alimentari migliori.

<h4>Il Ruolo delle Fibre</h4>

Le fibre, in particolare quelle solubili come la pectina, aumentano il senso di sazietà. Consumare alimenti ricchi di fibre, che sono una parte importante della dieta mediterranea, può farci sentire più pieni e ridurre il consumo di cibi ad alto contenuto calorico, grassi e zuccheri. <img src="https://smartlike.it/wp-content/uploads/2025/02/" alt="A bowl of oatmeal topped with fresh berries, nuts, and seeds, showcasing a hi

In [14]:
import re

xml_file_path = "/content/drive/MyDrive/prompts.xml"

# Leggi il contenuto del file XML
with open(xml_file_path, "r", encoding="utf-8") as f:
    xml_content = f.read()

# Pattern per eliminare la sequenza di apertura:
# Cerca "```html" seguito da eventuali newline e spazi, poi "&lt;article&gt;"
pattern_open = r"```html\s*&lt;article&gt;\s*"

# Pattern per eliminare la sequenza di chiusura:
# Cerca "&lt;/article&gt;" seguito da eventuali spazi e newline, poi "```"
pattern_close = r"&lt;/article&gt;\s*```"

# Applica le sostituzioni in modo da rimuovere le stringhe indesiderate
xml_clean = re.sub(pattern_open, "", xml_content, flags=re.MULTILINE)
xml_clean = re.sub(pattern_close, "", xml_clean, flags=re.MULTILINE)

# Scrivi il contenuto pulito sovrascrivendo il file
with open(xml_file_path, "w", encoding="utf-8") as f:
    f.write(xml_clean)

print("Il file XML è stato pulito e salvato in:", xml_file_path)


Il file XML è stato pulito e salvato in: /content/drive/MyDrive/prompts.xml


In [2]:
import re

def convert_to_html_list(text):
    lines = text.splitlines()
    new_lines = []
    in_list = False
    for line in lines:
        # Se la linea inizia con un asterisco seguito da uno spazio
        if re.match(r"^\•\s+", line):
            if not in_list:
                new_lines.append("<ul>")
                in_list = True
            # Rimuovi l'asterisco e lo spazio iniziale e crea un <li>
            item = re.sub(r"^\*\s+", "", line)
            new_lines.append(f"<li>{item}</li>")
        else:
            if in_list:
                new_lines.append("</ul>")
                in_list = False
            new_lines.append(line)
    if in_list:
        new_lines.append("</ul>")
    return "\n".join(new_lines)

# Esempio di testo generato:
text = """<h4>Riassunto dei punti chiave</h4>
* Gli infortuni gravi all'adduttore lungo nel calcio sono spesso causati da azioni di catena cinetica chiusa (CKC), in particolare quando il giocatore si allunga per raggiungere la palla con la gamba non infortunata.
* Questi infortuni sono caratterizzati da una combinazione di estensione, abduzione e rotazione esterna dell'anca.
* L'azione muscolare eccentrica, ovvero l'allungamento rapido del muscolo durante la contrazione, gioca un ruolo cruciale nell'infortunio.
* La prevenzione dovrebbe concentrarsi sul rafforzamento eccentrico dell'adduttore lungo, sul miglioramento del controllo del corpo e sulla forza dei muscoli sinergici.
Spero che questa analisi vi sia stata utile e interessante. Alla prossima!"""

converted_text = convert_to_html_list(text)
print(converted_text)

# Leggi il contenuto del file XML generato
with open(XML_OUTPUT_PATH, "r", encoding="utf-8") as f:
    xml_content = f.read()

# Visualizza un estratto del file per il debug (facoltativo)
snippet = xml_content[:1000]
print("Estratto del file XML prima della pulizia:\n", repr(snippet))

# Definisci i pattern da rimuovere.
# Pattern di apertura: rimuove sia "```html <article>" sia "```xml <article>"
pattern_open = r"(?:```html|```xml)\s*&lt;article&gt;\s*"
# Pattern di chiusura: rimuove "</article> ```" (assumendo che non varino)
pattern_close = r"&lt;/article&gt;\s*```"

# Applica le sostituzioni usando regex in modalità MULTILINE
xml_clean = re.sub(pattern_open, "", xml_content, flags=re.MULTILINE)
xml_clean = re.sub(pattern_close, "", xml_clean, flags=re.MULTILINE)

# Scrivi il contenuto pulito sovrascrivendo il file originale
with open(XML_OUTPUT_PATH, "w", encoding="utf-8") as f:
    f.write(xml_clean)

print("Il file XML è stato pulito e salvato in:", XML_OUTPUT_PATH)

<h4>Riassunto dei punti chiave</h4>
* Gli infortuni gravi all'adduttore lungo nel calcio sono spesso causati da azioni di catena cinetica chiusa (CKC), in particolare quando il giocatore si allunga per raggiungere la palla con la gamba non infortunata.
* Questi infortuni sono caratterizzati da una combinazione di estensione, abduzione e rotazione esterna dell'anca.
* L'azione muscolare eccentrica, ovvero l'allungamento rapido del muscolo durante la contrazione, gioca un ruolo cruciale nell'infortunio.
* La prevenzione dovrebbe concentrarsi sul rafforzamento eccentrico dell'adduttore lungo, sul miglioramento del controllo del corpo e sulla forza dei muscoli sinergici.
Spero che questa analisi vi sia stata utile e interessante. Alla prossima!


NameError: name 'XML_OUTPUT_PATH' is not defined

# Intro to Gemini 2.0 Flash


<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fgetting-started%2Fintro_gemini_2_0_flash.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb">
      <img width="32px" src="https://upload.wikimedia.org/wikipedia/commons/9/91/Octicons-mark-github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://goo.gle/40JXy6g">
      <img width="32px" src="https://cdn.qwiklabs.com/assets/gcp_cloud-e3a77215f0b8bfa9b3f611c0d2208c7e8708ed31.svg" alt="Google Cloud logo"><br> Open in Cloud Skills Boost
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/53/X_logo_2023_original.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

| | |
|-|-|
| Author(s) |  [Eric Dong](https://github.com/gericdong), [Holt Skinner](https://github.com/holtskinner) |

## Overview

**YouTube Video: Introduction to Gemini on Vertex AI**

<a href="https://www.youtube.com/watch?v=YfiLUpNejpE&list=PLIivdWyY5sqJio2yeg1dlfILOUO2FoFRx" target="_blank">
  <img src="https://img.youtube.com/vi/YfiLUpNejpE/maxresdefault.jpg" alt="Introduction to Gemini on Vertex AI" width="500">
</a>

[Gemini 2.0 Flash](https://cloud.google.com/vertex-ai/generative-ai/docs/gemini-v2) is a new multimodal generative ai model from the Gemini family developed by [Google DeepMind](https://deepmind.google/). It is available through the Gemini API in Vertex AI and Vertex AI Studio. The model introduces new features and enhanced core capabilities:

- Multimodal Live API: This new API helps you create real-time vision and audio streaming applications with tool use.
- Speed and performance: Gemini 2.0 Flash is the fastest model in the industry, with a 3x improvement in time to first token (TTFT) over 1.5 Flash.
- Quality: The model maintains quality comparable to larger models like Gemini 1.5 Pro and GPT-4o.
- Improved agentic experiences: Gemini 2.0 delivers improvements to multimodal understanding, coding, complex instruction following, and function calling.
- New Modalities: Gemini 2.0 introduces native image generation and controllable text-to-speech capabilities, enabling image editing, localized artwork creation, and expressive storytelling.
- To support the new model, we're also shipping an all new SDK that supports simple migration between the Gemini Developer API and the Gemini API in Vertex AI.

In [3]:
import os
import re

# Definisci il percorso del file XML (assicurati che sia corretto)
XML_OUTPUT_PATH = "/content/drive/MyDrive/prompts.xml"

# Funzione per convertire le linee con asterisco in una lista HTML
def convert_to_html_list(text):
    lines = text.splitlines()
    new_lines = []
    in_list = False
    for line in lines:
        # Se la linea inizia con un asterisco seguito da uno spazio
        if re.match(r"^\*\s+", line):
            if not in_list:
                new_lines.append("<ul>")
                in_list = True
            # Rimuovi l'asterisco e lo spazio iniziale e crea un <li>
            item = re.sub(r"^\•\s+", "", line)
            new_lines.append(f"<li>{item}</li>")
        else:
            if in_list:
                new_lines.append("</ul>")
                in_list = False
            new_lines.append(line)
    if in_list:
        new_lines.append("</ul>")
    return "\n".join(new_lines)

# Esempio di utilizzo della funzione (puoi applicarla al testo generato prima di inserirlo nell'XML)
example_text = """<h4>Riassunto dei punti chiave</h4>
* Gli infortuni gravi all'adduttore lungo nel calcio sono spesso causati da azioni di catena cinetica chiusa (CKC), in particolare quando il giocatore si allunga per raggiungere la palla con la gamba non infortunata.
* Questi infortuni sono caratterizzati da una combinazione di estensione, abduzione e rotazione esterna dell'anca.
* L'azione muscolare eccentrica, ovvero l'allungamento rapido del muscolo durante la contrazione, gioca un ruolo cruciale nell'infortunio.
* La prevenzione dovrebbe concentrarsi sul rafforzamento eccentrico dell'adduttore lungo, sul miglioramento del controllo del corpo e sulla forza dei muscoli sinergici.
Spero che questa analisi vi sia stata utile e interessante. Alla prossima!"""

converted_text = convert_to_html_list(example_text)
print("Testo convertito in lista HTML:\n", converted_text)

# --- Parte 2: Pulizia del file XML già generato ---

# Leggi il contenuto del file XML
with open(XML_OUTPUT_PATH, "r", encoding="utf-8") as f:
    xml_content = f.read()

# Visualizza un estratto del file per il debug (facoltativo)
snippet = xml_content[:1000]
print("Estratto del file XML prima della pulizia:\n", repr(snippet))

# Definisci i pattern da rimuovere:
# Rimuove sia sequenze che iniziano con "```html" sia quelle con "```xml", seguite da "&lt;article&gt;"
pattern_open = r"(?:```html|```xml|```)\s*&lt;article&gt;\s*"
# Rimuove la sequenza di chiusura: "&lt;/article&gt;" seguito da eventuali spazi e poi "```"
pattern_close = r"&lt;/article&gt;\s*```"

# Applica le sostituzioni (in modalità MULTILINE)
xml_clean = re.sub(pattern_open, "", xml_content, flags=re.MULTILINE)
xml_clean = re.sub(pattern_close, "", xml_clean, flags=re.MULTILINE)

# Scrivi il contenuto pulito sovrascrivendo il file originale
with open(XML_OUTPUT_PATH, "w", encoding="utf-8") as f:
    f.write(xml_clean)

print("Il file XML è stato pulito e salvato in:", XML_OUTPUT_PATH)


Testo convertito in lista HTML:
 <h4>Riassunto dei punti chiave</h4>
<ul>
<li>* Gli infortuni gravi all'adduttore lungo nel calcio sono spesso causati da azioni di catena cinetica chiusa (CKC), in particolare quando il giocatore si allunga per raggiungere la palla con la gamba non infortunata.</li>
<li>* Questi infortuni sono caratterizzati da una combinazione di estensione, abduzione e rotazione esterna dell'anca.</li>
<li>* L'azione muscolare eccentrica, ovvero l'allungamento rapido del muscolo durante la contrazione, gioca un ruolo cruciale nell'infortunio.</li>
<li>* La prevenzione dovrebbe concentrarsi sul rafforzamento eccentrico dell'adduttore lungo, sul miglioramento del controllo del corpo e sulla forza dei muscoli sinergici.</li>
</ul>
Spero che questa analisi vi sia stata utile e interessante. Alla prossima!


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/prompts.xml'

### Objectives

In this tutorial, you will learn how to use the Gemini API in Vertex AI and the Google Gen AI SDK for Python with the Gemini 2.0 Flash model.

You will complete the following tasks:

- Generate text from text prompts
  - Generate streaming text
  - Start multi-turn chats
  - Use asynchronous methods
- Configure model parameters
- Set system instructions
- Use safety filters
- Use controlled generation
- Count tokens
- Process multimodal (audio, code, documents, images, video) data
- Use automatic and manual function calling
- Code execution

## Getting Started

### Install Google Gen AI SDK for Python


In [None]:
%pip install --upgrade --quiet google-genai

### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Connect to a generative AI API service

Google Gen AI APIs and models including Gemini are available in the following two API services:

- **[Google AI for Developers](https://ai.google.dev/gemini-api/docs)**: Experiment, prototype, and deploy small projects.
- **[Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/overview)**: Build enterprise-ready projects on Google Cloud.

The Google Gen AI SDK provides a unified interface to these two API services.

This notebook shows how to use the Google Gen AI SDK with the Gemini API in Vertex AI.

### Import libraries


In [None]:
from IPython.display import HTML, Markdown, display
from google import genai
from google.genai.types import (
    FunctionDeclaration,
    GenerateContentConfig,
    GoogleSearch,
    HarmBlockThreshold,
    HarmCategory,
    MediaResolution,
    Part,
    Retrieval,
    SafetySetting,
    Tool,
    ToolCodeExecution,
    VertexAISearch,
)

### Set up Google Cloud Project or API Key for Vertex AI

You'll need to set up authentication by choosing **one** of the following methods:

1.  **Use a Google Cloud Project:** Recommended for most users, this requires enabling the Vertex AI API in your Google Cloud project.
    [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)
    *   Run the cell below to set your project ID.
2.  **Use a Vertex AI API Key (Express Mode):** For quick experimentation.
    [Get an API Key](https://cloud.google.com/vertex-ai/generative-ai/docs/start/express-mode/overview)
    *   Run the cell further below to use your API key.

#### Option 1. Use a Google Cloud Project


In [None]:
import os

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

#### Option 2. Use a Vertex AI API Key (Express Mode)

In [None]:
API_KEY = "[your-api-key]"  # @param {type: "string", placeholder: "[your-api-key]", isTemplate: true}

if not API_KEY or API_KEY == "[your-api-key]":
    raise Exception("You must provide an API key to use Vertex AI in express mode.")

client = genai.Client(vertexai=True, api_key=API_KEY)

Verify which mode you are using.

In [None]:
if not client._api_client.vertexai:
    print(f"Using Gemini Developer API.")
elif client._api_client.project:
    print(
        f"Using Vertex AI with project: {client._api_client.project} in location: {client._api_client.location}"
    )
elif client._api_client.api_key:
    print(
        f"Using Vertex AI in express mode with API key: {client._api_client.api_key[:5]}...{client._api_client.api_key[-5:]}"
    )

## Use the Gemini 2.0 Flash model

### Load the Gemini 2.0 Flash model

Learn more about all [Gemini models on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models).

In [None]:
MODEL_ID = "gemini-2.0-flash-001"  # @param {type: "string"}

### Generate text from text prompts

Use the `generate_content()` method to generate responses to your prompts.

You can pass text to `generate_content()`, and use the `.text` property to get the text content of the response.

By default, Gemini outputs formatted text using [Markdown](https://daringfireball.net/projects/markdown/) syntax.

In [None]:
response = client.models.generate_content(
    model=MODEL_ID, contents="What's the largest planet in our solar system?"
)

display(Markdown(response.text))

#### Example prompts

- What are the biggest challenges facing the healthcare industry?
- What are the latest developments in the automotive industry?
- What are the biggest opportunities in retail industry?
- (Try your own prompts!)

For more examples of prompt engineering, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/prompts/intro_prompt_design.ipynb).

### Generate content stream

By default, the model returns a response after completing the entire generation process. You can also use the `generate_content_stream` method to stream the response as it is being generated, and the model will return chunks of the response as soon as they are generated.

In [None]:
for chunk in client.models.generate_content_stream(
    model=MODEL_ID,
    contents="Tell me a story about a lonely robot who finds friendship in a most unexpected place.",
):
    display(Markdown(chunk.text))
    display(Markdown("---"))

### Start a multi-turn chat

The Gemini API supports freeform multi-turn conversations across multiple turns with back-and-forth interactions.

The context of the conversation is preserved between messages.

In [None]:
chat = client.chats.create(model=MODEL_ID)

In [None]:
response = chat.send_message("Write a function that checks if a year is a leap year.")

display(Markdown(response.text))

This follow-up prompt shows how the model responds based on the previous prompt:

In [None]:
response = chat.send_message("Write a unit test of the generated function.")

display(Markdown(response.text))

### Send asynchronous requests

`client.aio` exposes all analogous [async](https://docs.python.org/3/library/asyncio.html) methods that are available on `client`.

For example, `client.aio.models.generate_content` is the async version of `client.models.generate_content`.

In [None]:
response = await client.aio.models.generate_content(
    model=MODEL_ID,
    contents="Compose a song about the adventures of a time-traveling squirrel.",
)

display(Markdown(response.text))

## Configure model parameters

You can include parameter values in each call that you send to a model to control how the model generates a response. The model can generate different results for different parameter values. You can experiment with different model parameters to see how the results change.

- Learn more about [experimenting with parameter values](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/adjust-parameter-values).

- See a list of all [Gemini API parameters](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#parameters).


In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents="Tell me how the internet works, but pretend I'm a puppy who only understands squeaky toys.",
    config=GenerateContentConfig(
        temperature=0.4,
        top_p=0.95,
        top_k=20,
        candidate_count=1,
        seed=5,
        max_output_tokens=100,
        stop_sequences=["STOP!"],
        presence_penalty=0.0,
        frequency_penalty=0.0,
    ),
)

display(Markdown(response.text))

## Set system instructions

[System instructions](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/prompts/system-instruction-introduction) allow you to steer the behavior of the model. By setting the system instruction, you are giving the model additional context to understand the task, provide more customized responses, and adhere to guidelines over the user interaction.

In [None]:
system_instruction = """
  You are a helpful language translator.
  Your mission is to translate text in English to Spanish.
"""

prompt = """
  User input: I like bagels.
  Answer:
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=prompt,
    config=GenerateContentConfig(
        system_instruction=system_instruction,
    ),
)

display(Markdown(response.text))

## Safety filters

The Gemini API provides safety filters that you can adjust across multiple filter categories to restrict or allow certain types of content. You can use these filters to adjust what's appropriate for your use case. See the [Configure safety filters](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/configure-safety-filters) page for details.

When you make a request to Gemini, the content is analyzed and assigned a safety rating. You can inspect the safety ratings of the generated content by printing out the model responses.

The safety settings are `OFF` by default and the default block thresholds are `BLOCK_NONE`.

For more examples of safety filters, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/responsible-ai/gemini_safety_ratings.ipynb).

You can use `safety_settings` to adjust the safety settings for each request you make to the API. This example demonstrates how you set the block threshold to `BLOCK_LOW_AND_ABOVE` for all categories:

In [None]:
system_instruction = "Be as mean as possible."

prompt = """
    Write a list of 5 disrespectful things that I might say to the universe after stubbing my toe in the dark.
"""

safety_settings = [
    SafetySetting(
        category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    ),
    SafetySetting(
        category=HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    ),
    SafetySetting(
        category=HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    ),
    SafetySetting(
        category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    ),
]

response = client.models.generate_content(
    model=MODEL_ID,
    contents=prompt,
    config=GenerateContentConfig(
        system_instruction=system_instruction,
        safety_settings=safety_settings,
    ),
)

# Response will be `None` if it is blocked.
print(response.text)
# Finish Reason will be `SAFETY` if it is blocked.
print(response.candidates[0].finish_reason)
# Safety Ratings show the levels for each filter.
for safety_rating in response.candidates[0].safety_ratings:
    print(safety_rating)

## Send multimodal prompts

Gemini is a multimodal model that supports multimodal prompts.

You can include any of the following data types from various sources.

<table>
  <thead>
    <tr>
      <th>Data type</th>
      <th>Source(s)</th>
      <th>MIME Type(s)</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Text</td>
      <td>Inline, Local File, General URL, Google Cloud Storage</td>
      <td><code>text/plain</code></td>
    </tr>
    <tr>
      <td>Code</td>
      <td>Inline, Local File, General URL, Google Cloud Storage</td>
      <td><code>text/plain</code></td>
    </tr>
    <tr>
      <td>Document</td>
      <td>Local File, General URL, Google Cloud Storage</td>
      <td><code>application/pdf</code></td>
    </tr>
    <tr>
      <td>Image</td>
      <td>Local File, General URL, Google Cloud Storage</td>
      <td><code>image/jpeg</code> <code>image/png</code> <code>image/webp</code></td>
    </tr>
    <tr>
      <td>Audio</td>
      <td>Local File, General URL, Google Cloud Storage</td>
      <td>
        <code>audio/aac</code> <code>audio/flac</code> <code>audio/mp3</code>
        <code>audio/m4a</code> <code>audio/mpeg</code> <code>audio/mpga</code>
        <code>audio/mp4</code> <code>audio/opus</code> <code>audio/pcm</code>
        <code>audio/wav</code> <code>audio/webm</code>
      </td>
    </tr>
    <tr>
      <td>Video</td>
      <td>Local File, General URL, Google Cloud Storage, YouTube</td>
      <td>
        <code>video/mp4</code> <code>video/mpeg</code> <code>video/x-flv</code>
        <code>video/quicktime</code> <code>video/mpegps</code> <code>video/mpg</code>
        <code>video/webm</code> <code>video/wmv</code> <code>video/3gpp</code>
      </td>
    </tr>
  </tbody>
</table>

Set `config.media_resolution` to optimize for speed or quality. Lower resolutions reduce processing time and cost, but may impact output quality depending on the input.

For more examples of multimodal use cases, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/getting-started/intro_gemini_2_0_flash.ipynb).

### Send local image

Download an image to local storage from Google Cloud Storage.

For this example, we'll use this image of a meal.

<img src="https://storage.googleapis.com/cloud-samples-data/generative-ai/image/meal.png" alt="Meal" width="500">

In [None]:
!gsutil cp gs://cloud-samples-data/generative-ai/image/meal.png .

In [None]:
with open("meal.png", "rb") as f:
    image = f.read()

response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        Part.from_bytes(data=image, mime_type="image/png"),
        "Write a short and engaging blog post based on this picture.",
    ],
    # Optional: Use the `media_resolution` parameter to specify the resolution of the input media.
    config=GenerateContentConfig(
        media_resolution=MediaResolution.MEDIA_RESOLUTION_LOW,
    ),
)

display(Markdown(response.text))

### Send document from Google Cloud Storage

This example document is the paper ["Attention is All You Need"](https://arxiv.org/abs/1706.03762), created by researchers from Google and the University of Toronto.

Check out this notebook for more examples of document understanding with Gemini:

- [Document Processing with Gemini](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/document-processing/document_processing.ipynb)

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        Part.from_uri(
            file_uri="gs://cloud-samples-data/generative-ai/pdf/1706.03762v7.pdf",
            mime_type="application/pdf",
        ),
        "Summarize the document.",
    ],
)

display(Markdown(response.text))

### Send audio from General URL

This example is audio from an episode of the [Kubernetes Podcast](https://kubernetespodcast.com/).

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        Part.from_uri(
            file_uri="https://traffic.libsyn.com/secure/e780d51f-f115-44a6-8252-aed9216bb521/KPOD242.mp3",
            mime_type="audio/mpeg",
        ),
        "Write a summary of this podcast episode.",
    ],
    config=GenerateContentConfig(audio_timestamp=True),
)

display(Markdown(response.text))

### Send video from YouTube URL

This example is the YouTube video [Google — 25 Years in Search: The Most Searched](https://www.youtube.com/watch?v=3KtWfp0UopM).


In [None]:
video = Part.from_uri(
    file_uri="https://www.youtube.com/watch?v=3KtWfp0UopM",
    mime_type="video/mp4",
)

response = client.models.generate_content(
    model=MODEL_ID,
    contents=[
        video,
        "At what point in the video is Harry Potter shown?",
    ],
)

display(Markdown(response.text))

## Multimodal Live API

The Multimodal Live API enables low-latency bidirectional voice and video interactions with Gemini. Using the Multimodal Live API, you can provide end users with the experience of natural, human-like voice conversations, and with the ability to interrupt the model's responses using voice commands. The model can process text, audio, and video input, and it can provide text and audio output.

The Multimodal Live API is built on [WebSockets](https://developer.mozilla.org/en-US/docs/Web/API/WebSockets_API).

For more examples with the Multimodal Live API, refer to the [documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live) or this notebook: [Getting Started with the Multimodal Live API using Gen AI SDK
](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/multimodal-live-api/intro_multimodal_live_api_genai_sdk.ipynb).

## Control generated output

[Controlled generation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output) allows you to define a response schema to specify the structure of a model's output, the field names, and the expected data type for each field.

The response schema is specified in the `response_schema` parameter in `config`, and the model output will strictly follow that schema.

You can provide the schemas as [Pydantic](https://docs.pydantic.dev/) models or a [JSON](https://www.json.org/json-en.html) string and the model will respond as JSON or an [Enum](https://docs.python.org/3/library/enum.html) depending on the value set in `response_mime_type`.

For more examples of controlled generation, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/controlled-generation/intro_controlled_generation.ipynb).

In [None]:
from pydantic import BaseModel


class Recipe(BaseModel):
    name: str
    description: str
    ingredients: list[str]


response = client.models.generate_content(
    model=MODEL_ID,
    contents="List a few popular cookie recipes and their ingredients.",
    config=GenerateContentConfig(
        response_mime_type="application/json",
        response_schema=Recipe,
    ),
)

print(response.text)

You can either parse the response string as JSON, or use the `parsed` field to get the response as an object or dictionary.

In [None]:
parsed_response: Recipe = response.parsed
print(parsed_response)

You also can define a response schema in a Python dictionary. You can only use the supported fields as listed below. All other fields are ignored.

- `enum`
- `items`
- `maxItems`
- `nullable`
- `properties`
- `required`

In this example, you instruct the model to analyze product review data, extract key entities, perform sentiment classification (multiple choices), provide additional explanation, and output the results in JSON format.


In [None]:
response_schema = {
    "type": "ARRAY",
    "items": {
        "type": "ARRAY",
        "items": {
            "type": "OBJECT",
            "properties": {
                "rating": {"type": "INTEGER"},
                "flavor": {"type": "STRING"},
                "sentiment": {
                    "type": "STRING",
                    "enum": ["POSITIVE", "NEGATIVE", "NEUTRAL"],
                },
                "explanation": {"type": "STRING"},
            },
            "required": ["rating", "flavor", "sentiment", "explanation"],
        },
    },
}

prompt = """
  Analyze the following product reviews, output the sentiment classification, and give an explanation.

  - "Absolutely loved it! Best ice cream I've ever had." Rating: 4, Flavor: Strawberry Cheesecake
  - "Quite good, but a bit too sweet for my taste." Rating: 1, Flavor: Mango Tango
"""

response = client.models.generate_content(
    model=MODEL_ID,
    contents=prompt,
    config=GenerateContentConfig(
        response_mime_type="application/json",
        response_schema=response_schema,
    ),
)

response_dict = response.parsed
print(response_dict)

## Count tokens and compute tokens

You can use the `count_tokens()` method to calculate the number of input tokens before sending a request to the Gemini API.

For more information, refer to [list and count tokens](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/list-token)


### Count tokens

In [None]:
response = client.models.count_tokens(
    model=MODEL_ID,
    contents="What's the highest mountain in Africa?",
)

print(response)

### Compute tokens

The `compute_tokens()` method runs a local tokenizer instead of making an API call. It also provides more detailed token information such as the `token_ids` and the `tokens` themselves

<div class="alert alert-block alert-info">
<b>NOTE: This method is only supported in Vertex AI.</b>
</div>

In [None]:
response = client.models.compute_tokens(
    model=MODEL_ID,
    contents="What's the longest word in the English language?",
)

print(response)

## Search as a tool (Grounding)

[Grounding](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-gemini) lets you connect real-world data to the Gemini model.

By grounding model responses in Google Search results, the model can access information at runtime that goes beyond its training data which can produce more accurate, up-to-date, and relevant responses.

Using Grounding with Google Search, you can improve the accuracy and recency of responses from the model. Starting with Gemini 2.0, Google Search is available as a tool. This means that the model can decide when to use Google Search.

For more examples of Grounding, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/grounding/intro-grounding-gemini.ipynb).

### Google Search

You can add the `tools` keyword argument with a `Tool` including `GoogleSearch` to instruct Gemini to first perform a Google Search with the prompt, then construct an answer based on the web search results.

[Dynamic Retrieval](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/ground-gemini#dynamic-retrieval) lets you set a threshold for when grounding is used for model responses. This is useful when the prompt doesn't require an answer grounded in Google Search and the supported models can provide an answer based on their knowledge without grounding. This helps you manage latency, quality, and cost more effectively.

In [None]:
google_search_tool = Tool(google_search=GoogleSearch())

response = client.models.generate_content(
    model=MODEL_ID,
    contents="When is the next total solar eclipse in the United States?",
    config=GenerateContentConfig(tools=[google_search_tool]),
)

display(Markdown(response.text))

print(response.candidates[0].grounding_metadata)

HTML(response.candidates[0].grounding_metadata.search_entry_point.rendered_content)

### Vertex AI Search

You can use a [Vertex AI Search data store](https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es) to connect Gemini to your own custom data.

Follow the [get started guide for Vertex AI Search](https://cloud.google.com/generative-ai-app-builder/docs/try-enterprise-search) to create a data store and app, then add the data store ID in the following code cell.

In [None]:
data_store_id = "YOUR_DATA_STORE_ID"  # @param {type: "string"}

vertex_ai_search_tool = Tool(
    retrieval=Retrieval(
        vertex_ai_search=VertexAISearch(
            datastore=f"projects/{PROJECT_ID}/locations/us/collections/default_collection/dataStores/{data_store_id}"
        )
    )
)

response = client.models.generate_content(
    model=MODEL_ID,
    contents="What is the company culture like?",
    config=GenerateContentConfig(tools=[vertex_ai_search_tool]),
)

display(Markdown(response.text))

print(response.candidates[0].grounding_metadata)

## Function calling

[Function Calling](https://cloud.google.com/vertex-ai/docs/generative-ai/multimodal/function-calling) in Gemini lets developers create a description of a function in their code, then pass that description to a language model in a request.

You can submit a Python function for automatic function calling, which will run the function and return the output in natural language generated by Gemini.

You can also submit an [OpenAPI Specification](https://www.openapis.org/) which will respond with the name of a function that matches the description and the arguments to call it with.

For more examples of Function Calling, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/function-calling/intro_function_calling.ipynb).

### Python Function (Automatic Function Calling)

In [None]:
def get_current_weather(location: str) -> str:
    """Example method. Returns the current weather.

    Args:
        location: The city and state, e.g. San Francisco, CA
    """
    weather_map: dict[str, str] = {
        "Boston, MA": "snowing",
        "San Francisco, CA": "foggy",
        "Seattle, WA": "raining",
        "Austin, TX": "hot",
        "Chicago, IL": "windy",
    }
    return weather_map.get(location, "unknown")


response = client.models.generate_content(
    model=MODEL_ID,
    contents="What is the weather like in Austin?",
    config=GenerateContentConfig(
        tools=[get_current_weather],
        temperature=0,
    ),
)

display(Markdown(response.text))

### OpenAPI Specification (Manual Function Calling)

In [None]:
get_destination = FunctionDeclaration(
    name="get_destination",
    description="Get the destination that the user wants to go to",
    parameters={
        "type": "OBJECT",
        "properties": {
            "destination": {
                "type": "STRING",
                "description": "Destination that the user wants to go to",
            },
        },
    },
)

destination_tool = Tool(
    function_declarations=[get_destination],
)

response = client.models.generate_content(
    model=MODEL_ID,
    contents="I'd like to travel to Paris.",
    config=GenerateContentConfig(
        tools=[destination_tool],
        temperature=0,
    ),
)

print(response.function_calls[0])

## Code Execution

The Gemini API [code execution](https://ai.google.dev/gemini-api/docs/code-execution?lang=python) feature enables the model to generate and run Python code and learn iteratively from the results until it arrives at a final output. You can use this code execution capability to build applications that benefit from code-based reasoning and that produce text output. For example, you could use code execution in an application that solves equations or processes text.

The Gemini API provides code execution as a tool, similar to function calling.
After you add code execution as a tool, the model decides when to use it.

For more examples of Code Execution, refer to [this notebook](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/code-execution/intro_code_execution.ipynb).

In [None]:
code_execution_tool = Tool(code_execution=ToolCodeExecution())

response = client.models.generate_content(
    model=MODEL_ID,
    contents="Calculate 20th fibonacci number. Then find the nearest palindrome to it.",
    config=GenerateContentConfig(
        tools=[code_execution_tool],
        temperature=0,
    ),
)
for part in response.candidates[0].content.parts:
    if part.executable_code:
        print("Language:", part.executable_code.language)
        display(
            Markdown(
                f"""
```
{part.executable_code.code}
```
"""
            )
        )
    if part.code_execution_result:
        print("\nOutcome:", part.code_execution_result.outcome)
        display(Markdown(f"`{part.code_execution_result.output}`"))

## Spatial Understanding

Gemini 2.0 includes improved spatial understanding and object detection capabilities. Check out this notebook for examples:

- [2D spatial understanding with Gemini 2.0](https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/spatial-understanding/spatial_understanding.ipynb)

## What's next

- See the [Google Gen AI SDK reference docs](https://googleapis.github.io/python-genai/).
- Explore other notebooks in the [Google Cloud Generative AI GitHub repository](https://github.com/GoogleCloudPlatform/generative-ai).
- Explore AI models in [Model Garden](https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models).