In [1]:

import os
from dotenv import load_dotenv
load_dotenv(override=True)
import pprint
import base64
from google import genai
from google.genai import types
from google.genai.types import GenerateContentConfig

# importing Image module from PIL package  
from PIL import Image  

### Instanciate the API

In [2]:
client = genai.Client(
    vertexai=True,
    project=os.getenv("PROJECT_ID"), 
    location=os.getenv("LOCATION")
)

In [3]:
model = os.getenv("MODEL")
   
# opening an image  
with open("./inputs/veuve-ad.jpg", "rb") as f:
    bytes = f.read()

image_file = types.Part.from_bytes(
    data=bytes, mime_type="image/jpg"
)

# Query the model
response = client.models.generate_content(model=model, contents=[image_file, "what is this image?"])
print(response.text)



Certainly! 

This image is an advertisement for **Clicquot Champagne**. It emphasizes its French origin and luxurious appeal, depicting a celebratory scene with people enjoying the champagne. The ad also highlights the brand's history, mentioning that it has been "prominently served since 1772."


### Read unsafe image

In [4]:
generate_content_config = GenerateContentConfig(
    temperature=1, 
    top_p=0.95, 
    max_output_tokens=8192,
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="BLOCK_LOW_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="BLOCK_LOW_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="BLOCK_LOW_AND_ABOVE"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="BLOCK_LOW_AND_ABOVE"
    )],
)

In [5]:
   
# opening an image  
with open("./inputs/ukrainian-war.png", "rb") as f:
    bytes = f.read()

image_file = types.Part.from_bytes(
    data=bytes, mime_type="image/png"
)

# Query the model
response = client.models.generate_content(
    model=model, 
    contents=[image_file, "what is this image?"],
    config=generate_content_config
    
    )
print(response.text)

Certainly! This image depicts a group of people in what appears to be a conflict or post-conflict zone. 

Here's a breakdown of what's happening:

*   **Carrying a Woman:** Two soldiers in camouflage uniforms are carrying a woman in their arms. She appears to be injured or incapacitated and is being moved to safety. Her face shows a mixture of stress and weariness. The fact that the woman is being carried suggests she may be hurt or unable to walk on her own.
*   **Soldiers' Appearance:** The soldiers are wearing full military gear including helmets, neck gaiters, and camouflage clothing. One is also carrying a rifle. The patches on their arms suggest these are Ukrainian soldiers, as the yellow armbands are widely recognized as a symbol of the Ukrainian military.
*   **Other Figures:** There are two other men behind them. One of them appears to be taking photographs or video with a camera, while the other seems to be observing the situation. These men are likely civilians or perhaps me

### Add output parser

In [6]:

response_schema = {
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "image_nature": {"type": "string"},
            "colors": {
                "type": "array",
                "items": {"type": "string"} 
            }
        },
        "required": ["image_nature", "colors"],
    },
}


In [13]:

response = client.models.generate_content(
    model=os.getenv("MODEL"),
    contents=[image_file, "what is this image?"],
    config=GenerateContentConfig(
        response_mime_type="application/json", response_schema=response_schema
    ),
)

print(response.text)

[
  {
    "colors": ["brown", "gray", "green", "pink", "black"],
    "image_nature": "photograph"
  }
]


### Using Langchain

In [None]:
from langchain_google_vertexai import ChatVertexAI, SafetySetting
from langchain_core.output_parsers.json import JsonOutputParser
from langchain_core.runnables import chain
from langchain_core.messages import HumanMessage
from typing import Literal
from pydantic import Field, BaseModel

from langchain_google_vertexai import (
    HarmBlockThreshold,
    HarmCategory,
)


llm = ChatVertexAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    project=os.getenv("PROJECT_ID"),
    location=os.getenv("LOCATION")
    # safety_settings={
    #     HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    # },
    # other params...
)


class ImageInformation(BaseModel):
    """Information about an image."""
    call_to_action: Literal["yes", "no"] = Field(description="Indicates whether a call to action is present in the image")  
    call_to_action_size: int = Field(description="If a call to action is present, specifies the percentage of the image area covered by the call to action")  
    car_model: str = Field(description="If applicable, specifies the model of the car shown in the image")  
    price: Literal["yes", "no"] = Field(description="Indicates whether a price is displayed in the image")  
    ad_purpose: str = Field(description="If applicable, describes the main action or objective encouraged by the ad, such as 'buy', 'sign up', 'learn more', etc.")  
    price_size: int = Field(description="If a price is displayed, specifies the percentage of the image area covered by the price")  
    discount_display: Literal["yes", "no"] = Field(description="Indicates whether a discount message is present in the ad")  
    promotion_wording_type: Literal["gain", "unknown", "loss_aversion"] = Field(description="Specifies the type of promotion wording used in the ad: 'gain' for emphasizing benefits, 'loss_aversion' for emphasizing avoidance of loss, or 'None' if there is no promotion wording")  
    call_to_action_verb: str = Field(description="Specifies the verb used in the call to action, such as 'buy', 'subscribe', 'discover', if present")  
    promotion_deadline: Literal["yes", "no"] = Field(description="Indicates whether a promotion deadline is displayed in the image")  
    promotion_theme: str = Field(description="Specifies if the promotion is linked to a special calendar event, e.g., Black Friday, Christmas, New Year, etc.")  
    promotion_tone: Literal["unknown", "informative", "persuasive", "emotional", "humorous", "inspirational", "authoritative", "urgent", "relatable"] = Field(description="Indicates the tone of voice used in the advertisement, if applicable")  
    new_old_vehicle: Literal["new", "old", "second_hand", "unknown"] = Field(description="Indicates whether the advertised vehicle is new or old") 
    brand_logo: Literal["yes", "no"] = Field(description="Indicates whether the brand logo is visible in the image")
    color_scheme: str = Field(description="Describes the dominant color scheme used in the advertisement, such as 'bright', 'pastel', 'monochrome', etc.")
    text_density: int = Field(description="Specifies the percentage of the image area covered by text")
    visual_style: str = Field(description="Describes the visual style of the advertisement, such as 'minimalistic', 'busy', 'vintage', 'modern', etc.")
    target_audience: str = Field(description="Specifies the primary target audience for the advertisement, such as 'young adults', 'families', 'business professionals', etc.")


parser = JsonOutputParser(pydantic_object=ImageInformation)



In [17]:
llm.invoke("hello")



AIMessage(content='Hello! 👋  How can I help you today? 😊 \n', additional_kwargs={}, response_metadata={'is_blocked': False, 'safety_ratings': [{'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability_label': 'NEGLIGIBLE', 'blocked': False, 'severity': 'HARM_SEVERITY_NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability_label': 'NEGLIGIBLE', 'blocked': False, 'severity': 'HARM_SEVERITY_NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability_label': 'NEGLIGIBLE', 'blocked': False, 'severity': 'HARM_SEVERITY_NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability_label': 'NEGLIGIBLE', 'blocked': False, 'severity': 'HARM_SEVERITY_NEGLIGIBLE'}], 'usage_metadata': {'prompt_token_count': 1, 'candidates_token_count': 14, 'total_token_count': 15, 'cached_content_token_count': 0}, 'finish_reason': 'STOP', 'avg_logprobs': -0.05891899125916617}, id='run-c7bb8ed4-f84a-4e16-b1d1-dd53c3e617c9-0', usage_metadata={'input_tokens': 1, 'output_tokens': 14,

In [24]:
import base64
with open("./inputs/veuve-ad.jpg", "rb") as f:
    encoded_string = base64.b64encode(f.read()).decode(encoding="utf-8")

    prompt = "retrieve information about the image"

    msg = llm.invoke(
            [HumanMessage(
            content=[
            {"type": "text", "text": prompt},
            {"type": "text", "text": parser.get_format_instructions()},
            {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_string}"}},
            ])]
            )

print(msg)

content='```json\n{"call_to_action": "no", "call_to_action_size": 0, "car_model": "unknown", "price": "no", "ad_purpose": "unknown", "price_size": 0, "discount_display": "no", "promotion_wording_type": "unknown", "call_to_action_verb": "unknown", "promotion_deadline": "no", "promotion_theme": "unknown", "promotion_tone": "unknown", "new_old_vehicle": "unknown", "brand_logo": "yes", "color_scheme": "unknown", "text_density": 10, "visual_style": "unknown", "target_audience": "unknown"}\n```' additional_kwargs={} response_metadata={'is_blocked': False, 'safety_ratings': [{'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability_label': 'LOW', 'blocked': False, 'severity': 'HARM_SEVERITY_NEGLIGIBLE'}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability_label': 'NEGLIGIBLE', 'blocked': False, 'severity': 'HARM_SEVERITY_LOW'}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability_label': 'NEGLIGIBLE', 'blocked': False, 'severity': 'HARM_SEVERITY_NEGLIGIBLE'}, {'category': 'HARM_CATEGOR