In [None]:
# INSTALAMOS VERTEX AI SDK PARA PYTHON

!pip3 install --upgrade --user google-cloud-aiplatform
!pip3 install --upgrade --user google-cloud-aiplatform pymupdf

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.91.0-py2.py3-none-any.whl.metadata (35 kB)
Downloading google_cloud_aiplatform-1.91.0-py2.py3-none-any.whl (7.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m44.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-cloud-aiplatform
[0mSuccessfully installed google-cloud-aiplatform-1.91.0
Collecting pymupdf
  Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.5-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m59.7 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: pymupdf
[0mSuccessfully installed pymupdf-1.25.5


In [None]:
# REESTABLECEMOS EL KERNEL

import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)


{'status': 'ok', 'restart': True}

### Define Google Cloud project information

In [None]:
#INFORMACIÓN DEL PROYECTO

import sys

PROJECT_ID = "qwiklabs-gcp-01-4b4df9e68e91" 
LOCATION = "us-east4" 


if "google.colab" not in sys.modules:
    import subprocess

    PROJECT_ID = subprocess.check_output(
        ["gcloud", "config", "get-value", "project"], text=True
    ).strip()

print(f"Your project ID is: {PROJECT_ID}")


Your project ID is: qwiklabs-gcp-01-4b4df9e68e91


### Initialize Vertex AI

Initialize the Vertex AI SDK for Python for your project:

In [None]:
# INICIAMOS VERTEX AI

import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)


## Task 1. Generating Multimodal Insights with Gemini

Gemini is multimodal model that supports multimodal prompts. You can include text, image(s), and video in your prompt requests and get text or code responses.

To complete Task 1, follow the instructions at the top of each notebook cell:
* Run the cells with the comment "RUN THIS CELL AS IS".
* Complete and run the cells with the comment "COMPLETE THE MISSING PART AND RUN THIS CELL".

__Note__: Ensure you can see the weather related data in the response that is printed.


### Setup and requirements for Task 1

#### Import libraries

In [None]:
# IMPORTAMOS LAS LIBRERIAS

from vertexai.generative_models import (
    GenerationConfig,
    GenerativeModel,
    Image,
    Part,
)

#### Load Gemini 2.0 Flash Model

In [None]:
# CARGAMOS EL MODELO

multimodal_model = GenerativeModel("gemini-2.0-flash-001")

#### Define helper functions

In [None]:
# DEFINIMOS LAS FUNCIONES

import http.client
import typing
import urllib.request

import IPython.display
from PIL import Image as PIL_Image
from PIL import ImageOps as PIL_ImageOps


def display_images(
    images: typing.Iterable[Image],
    max_width: int = 600,
    max_height: int = 350,
) -> None:
    for image in images:
        pil_image = typing.cast(PIL_Image.Image, image._pil_image)
        if pil_image.mode != "RGB":
            
            pil_image = pil_image.convert("RGB")
        image_width, image_height = pil_image.size
        if max_width < image_width or max_height < image_height:
            
            pil_image = PIL_ImageOps.contain(pil_image, (max_width, max_height))
        IPython.display.display(pil_image)


def get_image_bytes_from_url(image_url: str) -> bytes:
    with urllib.request.urlopen(image_url) as response:
        response = typing.cast(http.client.HTTPResponse, response)
        image_bytes = response.read()
    return image_bytes


def load_image_from_url(image_url: str) -> Image:
    image_bytes = get_image_bytes_from_url(image_url)
    return Image.from_bytes(image_bytes)


def display_content_as_image(content: str | Image | Part) -> bool:
    if not isinstance(content, Image):
        return False
    display_images([content])
    return True


def display_content_as_video(content: str | Image | Part) -> bool:
    if not isinstance(content, Part):
        return False
    part = typing.cast(Part, content)
    file_path = part.file_data.file_uri.removeprefix("gs://")
    video_url = f"https://storage.googleapis.com/{file_path}"
    IPython.display.display(IPython.display.Video(video_url, width=600))
    return True


def print_multimodal_prompt(contents: list[str | Image | Part]):
    """
    Given contents that would be sent to Gemini,
    output the full multimodal prompt for ease of readability.
    """
    for content in contents:
        if display_content_as_image(content):
            continue
        if display_content_as_video(content):
            continue
        print(content)

### Task 1.1. Image understanding across multiple images

In [None]:

image_ask_first_1_url = "https://storage.googleapis.com/spls/gsp520/Google_Branding/Ask_first_1.png"
image_dont_do_this_1_url = "https://storage.googleapis.com/spls/gsp520/Google_Branding/Dont_do_this_1.png"
image_ask_first_1 = load_image_from_url(image_ask_first_1_url)
image_dont_do_this_1 = load_image_from_url(image_dont_do_this_1_url)

instructions = "Instructions: Consider the following image that contains text:"
prompt1 = "What is the title of this image"
prompt2 = """
Answer the question through these steps:
Step 1: Identify the title of each image by using the filename of each image.
Step 2: Describe the image.
Step 3: For each image, describe the actions that a user is expected to take.
Step 4: Extract the text from each image as a full sentence.
Step 5: Describe the sentiment for each image with an explanation.

Answer and describe the steps taken:
"""

#### Create an input for the multimodal model

In [None]:

contents = [ 
    Part.from_text("Analiza estas imágenes brevemente en castellano:"),
    Part.from_image(image_ask_first_1),
    Part.from_image(image_dont_do_this_1)]

#### Generate responses from the multimodal model

In [None]:

responses = multimodal_model.generate_content(contents)
print(f"responses = '{responses.text}'")

responses = 'Aquí tienes un análisis breve de las imágenes:

**Imagen 1:**

*   **Contenido visual:** La imagen muestra un logo que combina dos "X" azules entrelazadas, el nombre "SYSTEMERGER" debajo, y una frase que indica que el producto/servicio facilita la integración de flujos de trabajo. Además, muestra iconos de aplicaciones como Gmail, Google Calendar y Google Drive, insinuando compatibilidad.
*   **Texto complementario:** Advierte sobre el uso de iconos de productos, remitiendo a una guía de uso de iconos para saber si se pueden usar ciertos iconos de productos en asociación con el negocio.

**Imagen 2:**

*   **Contenido visual:** Muestra el logo de un restaurante llamado "Frank's Crab Shack" con una imagen de un ancla dentro de un círculo y el eslogan "#1 Crab Restaurant in Northeastern Maine" mostrado con el logo de Google al lado.
*   **Texto complementario:** Advierte sobre la implicación de respaldo. Indica que no se use el logo de Google ni ningún elemento de su marca d

#### Display the prompt and responses


In [None]:

print("--- Contenido (Prompt) ---")
for item in contents:
    if hasattr(item, 'part_data') and hasattr(item.part_data, 'inline_data'):
        if item.part_data.inline_data.mime_type.startswith('image'):
            print("- Contenido: Imagen (tipo MIME: {})".format(item.part_data.inline_data.mime_type))
        elif item.part_data.inline_data.mime_type == 'text/plain':
            print("- Contenido: Texto - '{}'".format(item.text))
        else:
            print("- Contenido: Otro tipo de dato")
    elif hasattr(item, 'text'):
        print("- Contenido: Texto - '{}'".format(item.text))
    else:
        print("- Contenido: Desconocido")

print("\n--- Respuesta del Modelo ---")
if hasattr(responses, 'text'):
    print(responses.text)
else:
    print("La respuesta del modelo no contiene texto.")
    print("Respuesta completa del modelo:", responses)



--- Contenido (Prompt) ---
- Contenido: Texto - 'Analiza estas imágenes brevemente en castellano:'
- Contenido: Desconocido
- Contenido: Desconocido

--- Respuesta del Modelo ---
Aquí tienes un análisis breve de las imágenes:

**Imagen 1:**

*   **Contenido visual:** La imagen muestra un logo que combina dos "X" azules entrelazadas, el nombre "SYSTEMERGER" debajo, y una frase que indica que el producto/servicio facilita la integración de flujos de trabajo. Además, muestra iconos de aplicaciones como Gmail, Google Calendar y Google Drive, insinuando compatibilidad.
*   **Texto complementario:** Advierte sobre el uso de iconos de productos, remitiendo a una guía de uso de iconos para saber si se pueden usar ciertos iconos de productos en asociación con el negocio.

**Imagen 2:**

*   **Contenido visual:** Muestra el logo de un restaurante llamado "Frank's Crab Shack" con una imagen de un ancla dentro de un círculo y el eslogan "#1 Crab Restaurant in Northeastern Maine" mostrado con el lo

### Task 1.2. Similarity/Differences between images

#### Explore the variables of the task

In [None]:
image_ask_first_3_url = "https://storage.googleapis.com/spls/gsp520/Google_Branding/Ask_first_3.png"
image_dont_do_this_3_url =  "https://storage.googleapis.com/spls/gsp520/Google_Branding/Dont_do_this_3.png"
image_ask_first_3 = load_image_from_url(image_ask_first_3_url)
image_dont_do_this_3 = load_image_from_url(image_dont_do_this_3_url)

prompt1 = """
Consider the following two images:
Image 1:
"""
prompt2 = """
Image 2:
"""
prompt3 = """
1. What is shown in Image 1 and Image 2?
2. What is similar between the two images?
3. What is difference between Image 1 and Image 2 in terms of the text ?
"""



#### Create an input for the multimodal model

In [None]:
contents = [
    Part.from_text(prompt1),
    Part.from_image(image_ask_first_3),
    Part.from_text(prompt2),
    Part.from_image(image_dont_do_this_3),
    Part.from_text(prompt3)
]

#### Set configuration parameters

In [None]:
generation_config =  {
    "temperature": 0.5,
    "top_p": 0.8,
    "top_k": 20
}


#### Generate responses from the multimodal model


In [None]:
responses =  multimodal_model.generate_content(
        contents,
        generation_config=generation_config
    )
print(f"responses = '{responses.text}'")


responses = 'Here's a breakdown of the images:

**1. What is shown in Image 1 and Image 2?**



**2. What is similar between the two images?**

*   **Connection to Google:** Both images relate to Google. Image 1 shows a partnership with "Grow with Google," and Image 2 warns against imitating Google's visual identity.

**3. What is the difference between Image 1 and Image 2 in terms of the text?**

*   **Purpose:** The text in Image 1 is informative and advisory, providing guidance on how to handle existing Google sponsorships. The text in Image 2 is cautionary, aiming to prevent the misuse or imitation of Google's brand elements.
*   **Tone:** The tone in Image 1 is professional and helpful. The tone in Image 2 is more assertive, emphasizing the importance of not imitating Google's brand.'


#### Display the prompt and responses

In [None]:
for item in contents:
    if hasattr(item, 'part_data') and hasattr(item.part_data, 'inline_data'):
        if item.part_data.inline_data.mime_type.startswith('image'):
            print("- Contenido: Imagen (tipo MIME: {})".format(item.part_data.inline_data.mime_type))
        elif item.part_data.inline_data.mime_type == 'text/plain':
            print("- Contenido: Texto - '{}'".format(item.text))
        else:
            print("- Contenido: Otro tipo de dato")
    elif hasattr(item, 'text'):
        print("- Contenido: Texto - '{}'".format(item.text))
    else:
        print("- Contenido: Desconocido")

print("\n--- Respuesta del Modelo ---")
if hasattr(responses, 'text'):
    print(responses.text)
else:
    print("La respuesta del modelo no contiene texto.")
    print("Respuesta completa del modelo:", responses)

- Contenido: Texto - '
Consider the following two images:
Image 1:
'
- Contenido: Desconocido
- Contenido: Texto - '
Image 2:
'
- Contenido: Desconocido
- Contenido: Texto - '
1. What is shown in Image 1 and Image 2?
2. What is similar between the two images?
3. What is difference between Image 1 and Image 2 in terms of the text ?
'

--- Respuesta del Modelo ---
Here's a breakdown of the images:

**1. What is shown in Image 1 and Image 2?**



**2. What is similar between the two images?**

*   **Connection to Google:** Both images relate to Google. Image 1 shows a partnership with "Grow with Google," and Image 2 warns against imitating Google's visual identity.

**3. What is the difference between Image 1 and Image 2 in terms of the text?**

*   **Purpose:** The text in Image 1 is informative and advisory, providing guidance on how to handle existing Google sponsorships. The text in Image 2 is cautionary, aiming to prevent the misuse or imitation of Google's brand elements.
*   **Tone


### Task 1.6. Retrieve extra information beyond the video

#### Explore the variables of the task

In [None]:
prompt = """
Answer the following questions using the video only:

How does the advertisement appeal to its target audience through its messaging and imagery?
What overall message or takeaway does the advertisement convey about the brand and its products?
Are there any symbolic elements or motifs used throughout the advertisement to reinforce its central themes?
What is the best hashtag for this video based on the description ?

"""

#CONFIGURAMOS EL VIDEO

video = Part.from_uri(
    uri="gs://spls/gsp520/google-pixel-8-pro.mp4",
    mime_type="video/mp4",
)

#### Create an input for the multimodal model

In [None]:
contents = [Part.from_text(prompt), video]

#### Generate responses from the multimodal model

In [None]:
responses =  multimodal_model.generate_content(contents)
print(f"responses = '{responses.text}'")

responses = 'Okay, here are the answers based solely on the information presented in the video:

*   **How does the advertisement appeal to its target audience through its messaging and imagery?**

    The advertisement appeals to the target audience by showcasing various situations and scenarios where people use the phone's features. It uses vibrant visuals, catchy music, and relatable situations to engage the viewer. It highlights features such as background noise reduction, object eraser, and ensuring everyone in a group photo is smiling to entice the target audience. The fast-paced editing and modern aesthetic appeal to a younger demographic.

*   **What overall message or takeaway does the advertisement convey about the brand and its products?**

    The advertisement conveys that the Google Pixel 8 Pro is an innovative and versatile phone that utilizes AI and editing features to improve photography and user experience. The overall message is that with Pixel, anyone can make momen

#### Display the prompt and responses

In [None]:
print("--- Prompt (Preguntas y Video) ---")
for item in contents:
    if isinstance(item, str):
        print("- Texto del Prompt:")
        print(item)
    elif hasattr(item, 'part_data') and hasattr(item.part_data, 'inline_data') and item.part_data.inline_data.mime_type.startswith('video'):
        print("- Contenido: Video (tipo MIME: {})".format(item.part_data.inline_data.mime_type))
        print("- URI del Video:", item.uri)
    elif hasattr(item, 'text'):
        print("- Texto del Prompt:")
        print(item.text)
    elif hasattr(item, 'uri') and hasattr(item, 'mime_type') and item.mime_type.startswith('video'):
        print("- Contenido: Video (tipo MIME: {})".format(item.mime_type))
        print("- URI del Video:", item.uri)
    else:
        print("- Contenido: Otro tipo de dato")

print("\n--- Respuesta del Modelo ---")
if hasattr(responses, 'text'):
    print(responses.text)
else:
    print("La respuesta del modelo no contiene texto.")
    print("Respuesta completa del modelo:", responses)

--- Prompt (Preguntas y Video) ---
- Texto del Prompt:

Answer the following questions using the video only:

How does the advertisement appeal to its target audience through its messaging and imagery?
What overall message or takeaway does the advertisement convey about the brand and its products?
Are there any symbolic elements or motifs used throughout the advertisement to reinforce its central themes?
What is the best hashtag for this video based on the description ?


- Contenido: Otro tipo de dato

--- Respuesta del Modelo ---
Okay, here are the answers based solely on the information presented in the video:

*   **How does the advertisement appeal to its target audience through its messaging and imagery?**

    The advertisement appeals to the target audience by showcasing various situations and scenarios where people use the phone's features. It uses vibrant visuals, catchy music, and relatable situations to engage the viewer. It highlights features such as background noise redu

## Task 2. Retrieving and integrating knowledge with multimodal retrieval augmented generation (RAG)

To complete Task 2, follow the instructions at the top of each notebook cell:
* Run the cells with the comment "RUN THIS CELL AS IS".
* Complete and run the cells with the comment "COMPLETE THE MISSING PART AND RUN THIS CELL".

For additional information about the available data and helper functions for Task 2, review the section named __Available data and helper functions for Task 2__ in the lab instructions.

### Setup and requirements for Task 2

#### Import libraries

In [None]:
from IPython.display import Markdown, display
from vertexai.generative_models import (
    Content,
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold,
    Image,
    Part,
)


#### Load the Gemini 2.0 Flash model

In [None]:
multimodal_model = GenerativeModel("gemini-2.0-flash-001")

#### Download custom Python modules and utilities 



In [None]:
import os
import urllib.request
import sys

if not os.path.exists("utils"):
    os.makedirs("utils")



url_prefix = "https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/use-cases/retrieval-augmented-generation/utils/"
files = ["intro_multimodal_rag_utils.py"]

for fname in files:
    urllib.request.urlretrieve(f"{url_prefix}/{fname}", filename=f"utils/{fname}")


#### Get documents and images from Cloud Storage

In [None]:
!gsutil -m rsync -r gs://spls/gsp520 .
print("Download completed")

Building synchronization state...
Starting synchronization...
Download completed


### Task 2.1. Build metadata of documents containing text and images


#### Import helper functions to build metadata

In [None]:
from utils.intro_multimodal_rag_utils import get_document_metadata

#### Explore the variables of the task

In [None]:


pdf_folder_path = "Google_Branding/"  

image_description_prompt = """Explain what is going on in the image.
If it's a table, extract all elements of the table.
If it's a graph, explain the findings in the graph.
Do not include any numbers that are not mentioned in the image.
"""


#### Extract and store metadata of text and images from a document

In [None]:

generative_multimodal_model = GenerativeModel("gemini-pro-vision")
image_save_dir = "extracted_images"

text_metadata_df, image_metadata_df = get_document_metadata(
    pdf_folder_path=pdf_folder_path,
    generative_multimodal_model=generative_multimodal_model,
    image_save_dir=image_save_dir,
    image_description_prompt=image_description_prompt
)



print("\n\n --- Completed processing. ---")



 Processing the file: --------------------------------- Google_Branding/Google_terms_of_service_en_us.pdf 


Processing page: 1
Processing page: 2
Processing page: 3
Processing page: 4
Processing page: 5
Processing page: 6
Processing page: 7
Processing page: 8
Processing page: 9
Processing page: 10
Processing page: 11
Processing page: 12
Processing page: 13
Processing page: 14
Processing page: 15
Processing page: 16


 --- Completed processing. ---


#### Inspect the processed text metadata

In [None]:

print("--- Primeras filas de text_metadata_df ---")
print(text_metadata_df.head())

--- Primeras filas de text_metadata_df ---
                           file_name  page_num  \
0  Google_terms_of_service_en_us.pdf         1   
1  Google_terms_of_service_en_us.pdf         1   
2  Google_terms_of_service_en_us.pdf         2   
3  Google_terms_of_service_en_us.pdf         2   
4  Google_terms_of_service_en_us.pdf         3   

                                                text  \
0  GOOGLE TERMS OF SERVICE\nEffective January 5, ...   
1  GOOGLE TERMS OF SERVICE\nEffective January 5, ...   
2  Google services are provided by, and youre con...   
3  Google services are provided by, and youre con...   
4  apps and sites (like Search and Maps)\nplatfor...   

                                 text_embedding_page  chunk_number  \
0  [-0.012991457246243954, 0.00233408878557384, 0...             1   
1  [-0.012991457246243954, 0.00233408878557384, 0...             2   
2  [-0.027467481791973114, -0.03269978240132332, ...             1   
3  [-0.027467481791973114, -0.032699782

#### Import the helper functions to implement RAG

In [None]:
from utils.intro_multimodal_rag_utils import (
    get_similar_text_from_query,
    print_text_to_text_citation,
    get_similar_image_from_query,
    print_text_to_image_citation,
    get_gemini_response,
    display_images,
)



### Task 2.2. Create a user query

#### Explore the variables of the task

In [None]:
# CREAMOS LA CONSULTA

query = """Questions:
 - What are the key expectations that users can have from Google regarding the provision and development of its services?
- What specific rules and guidelines are established for users when using Google services?
- How does Google handle intellectual property rights related to the content found within its services, including content owned by users, Google, and third parties? 
- What legal rights and remedies are available to users in case of problems or disagreements with Google?
- How do the service-specific additional terms interact with these general Terms of Service, and which terms take precedence in case of any conflicts?
 """



### Task 2.3. Get all relevant text chunks

#### Retrieve relevant chunks of text based on the query

In [None]:

matching_results_chunks_data = get_similar_text_from_query(
    query=query,
    text_metadata_df=text_metadata_df,
    column_name="text_embedding_page"  # Reemplaza con el nombre real de la columna
)



#### Display the first item of the text chunk dictionary

In [None]:

print("--- Primer ítem de matching_results_chunks_data ---")
if matching_results_chunks_data:
    first_item = list(matching_results_chunks_data.items())[0]
    print(first_item)
else:
    print("El diccionario matching_results_chunks_data está vacío.")

--- Primer ítem de matching_results_chunks_data ---
(0, {'file_name': 'Google_terms_of_service_en_us.pdf', 'page_num': np.int64(1), 'cosine_score': 0.87, 'chunk_number': np.int64(1), 'chunk_text': 'GOOGLE TERMS OF SERVICE\nEffective January 5, 2022\nArchived versions\nWhats covered in these terms\nWe know its tempting to skip these Terms of Service, but\nits important to establish what you can expect from us as\nyou use Google services, and what we expect from you.\nThese Terms of Service reflect the way Googles business works, the laws that apply to our company, and\ncertain things weve always believed to be true. As a result, these Terms of Service help define Googles\nrelationship with you as you interact with our services. For example, these terms include the following topic\nheadings:\nWhat you can expect from us, which describes how we provide and develop our services\nWhat we expect from you, which establishes certain rules for using our services\nContent in Google services, whi



### Task 2.4. Create context_text

#### Create a list to store the combined chunks of text

In [None]:
# LISTA VACIA
context_text = []


#### Iterate through each item in the text chunks dictionary

In [None]:
combined_text = ""
for key, value in matching_results_chunks_data.items():
     if isinstance(value, dict) and 'chunk' in value:
       combined_text += value['chunk'] + " " 


print("Texto combinado:")
print(combined_text)


Texto combinado:



#### Join all the text chunks and store in a list

In [None]:

final_context_text = "\n".join(context_text)


print("Texto final combinado:")
print(final_context_text)

Texto final combinado:




### Task 2.5. Pass context to Gemini

#### Explore the variables of the task


In [None]:
# PROVEEMOS LAS VARIABLES

final_context_text = "\n".join(context_text)

prompt = f""" Instructions: Answer in Markdown format, including bullet points, headings, and any other relevant formatting for readability. Compare the images and the text provided as Context: to answer multiple Question:
Make sure to think thoroughly before answering the question and put the necessary steps to arrive at the answer in bullet points for easy explainability.
If unsure, respond, "Not enough context to answer".

Context:
 - Text Context:
 {final_context_text}


{query}

Answer:
"""

print("--- Prompt Generado ---")
print(prompt)
print("--- Prompt Generado ---")
print(prompt)

--- Prompt Generado ---
 Instructions: Answer in Markdown format, including bullet points, headings, and any other relevant formatting for readability. Compare the images and the text provided as Context: to answer multiple Question:
Make sure to think thoroughly before answering the question and put the necessary steps to arrive at the answer in bullet points for easy explainability.
If unsure, respond, "Not enough context to answer".

Context:
 - Text Context:
 


Questions:
 - What are the key expectations that users can have from Google regarding the provision and development of its services?
- What specific rules and guidelines are established for users when using Google services?
- How does Google handle intellectual property rights related to the content found within its services, including content owned by users, Google, and third parties? 
- What legal rights and remedies are available to users in case of problems or disagreements with Google?
- How do the service-specific add

In [102]:

!pip install markdown


Collecting markdown
  Downloading markdown-3.8-py3-none-any.whl.metadata (5.1 kB)
Downloading markdown-3.8-py3-none-any.whl (106 kB)
Installing collected packages: markdown
Successfully installed markdown-3.8


#### Generate Gemini response with streaming output

In [None]:
from IPython.display import HTML, display
from utils.intro_multimodal_rag_utils import get_gemini_response
from vertexai.generative_models import GenerativeModel, GenerationConfig
import markdown 

if not 'final_context_text' in globals() or not final_context_text.strip():
    final_context_text = " No context provided." 

if not 'query' in globals():
    query = "What is this about?"

multimodal_model = GenerativeModel(model_name="gemini-2.0-flash-001")

generation_config = GenerationConfig(
    temperature=0.3,
    top_p=0.8,
    top_k=20,
    max_output_tokens=1024,

prompt = f"""Instructions: Compare the images and the text provided as Context: to answer multiple Questions:
Make sure to think thoroughly before answering the question and put the necessary steps to arrive at the answer in bullet points for easy explainability.
If unsure, respond, "Not enough context to answer".

Context:
- Text Context:
{final_context_text}

{query}

Answer in Markdown format, including bullet points, headings, and any other relevant formatting for readability.
"""

responses = get_gemini_response(
    multimodal_model,
    prompt,
    generation_config=generation_config,
    stream=True
)

def mostrar_contenido_markdown(responses):
    """
    Junta todos los chunks del modelo, los convierte de Markdown a HTML, y los muestra.
    """
    print("\n--- Recibiendo y formateando respuesta ---\n")
    full_response = ""

    for chunk in responses:
        
        text = ""
        if isinstance(chunk, str):
            text = chunk
        elif hasattr(chunk, "text"):
            text = chunk.text
        else:
            text = str(chunk)

        print(f"{text}", end="")  
        full_response += text


    html_output = markdown.markdown(full_response)
    display(HTML(html_output))

mostrar_contenido_markdown(responses)




--- Recibiendo y formateando respuesta ---

Since no images or text context were provided, I cannot answer the questions.

**Answer:**

Not enough context to answer.
