In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import base64

load_dotenv(".\.env")

image_path = r"D:\Learning Materials\Data Science\Projects\ai-classifier-app\bus.jpg"
# Open the file in binary mode and encode
with open(image_path, "rb") as f:
    image_bytes = f.read()
    image_base64 = base64.b64encode(image_bytes).decode("utf-8")

model = ChatGoogleGenerativeAI(
            model="models/gemini-2.0-flash-exp-image-generation",
            temperature=0,
            max_tokens=None,
            timeout=None)

message = {
    "role": "user",
    "content": [
        {
            "type": "text",
            "text": "What is the main object of this image? just give concise statement "
            "which I'll use in next agent for generating someusful information",
        },
   {
            "type": "image_url",
            "image_url": {"url": f"data:image/png;base64,{image_base64}"},
        },
    ],
}

response = model.invoke([message],
    generation_config=dict(response_modalities=["IMAGE", "TEXT"]),
) 

response.content

'A blue electric minibus with "cero emisiones" (zero emissions) written on its side.'

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
import base64

load_dotenv(".\.env")

def short_info_of_image(image_path):
    path = image_path
    # Open the file in binary mode and encode
    with open(path, "rb") as f:
        image_bytes = f.read()
        image_base64 = base64.b64encode(image_bytes).decode("utf-8")

    model = ChatGoogleGenerativeAI(
                model="models/gemini-2.0-flash-exp-image-generation",
                temperature=0,
                max_tokens=None,
                timeout=None)

    message = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "Just detect all the 5 objects in the image "
                "which I'll use in next agent for generating someusful information",
            },
    {
                "type": "image_url",
                "image_url": {"url": f"data:image/png;base64,{image_base64}"},
            },
        ],
    }

    response = model.invoke([message],
        generation_config=dict(response_modalities=["IMAGE", "TEXT"]),
    ) 
    return response.content

pa = r"D:\Learning Materials\Data Science\Projects\ai-classifier-app\bus.jpg"
short_info_of_image(pa)

'Here are the 5 objects detected in the image:\n\n1. **Bus**\n2. **Man with sunglasses and black coat**\n3. **Man with sunglasses and beige coat**\n4. **Man walking away (partially visible)**\n5. **Street/Sidewalk**'

In [46]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import HumanMessage


text_model = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite",
    temperature=0,
    max_tokens=None,
    timeout=None)

def generate_funny_message(description: str) -> str:
    prompt = f"Make a short, caption based on this image description: '{description} give one of the best capation out from the image'."
    response = text_model.invoke([HumanMessage(content=prompt)])
    return response.content


generate_funny_message(short_info_of_image(pa))

"Here are a few caption options, playing on different aspects of the image description:\n\n**Short & Sweet:**\n\n*   City life: Bus, coats, and a disappearing act.\n\n**More Descriptive:**\n\n*   Sunglasses, coats, and the rhythm of the city. A snapshot of everyday moments.\n\n**Intriguing:**\n\n*   Who's watching whom? A glimpse of urban anonymity.\n\n**My Choice (Best):**\n\n*   **A stylish street scene: Bus, coats, and the quiet drama of a city sidewalk.**"

In [None]:
from langgraph.graph import StateGraph, START, END


# Step 1: Define state schema
class GraphState(TypedDict):
    image_base64: str
    description: str
    funny_message: str

def build_image_graph():
    graph = StateGraph()

    # Agent 01
    def image_reader_node(state):
        image_bytes = state['image']
        description = short_info_of_image(image_bytes)
        return description

    # Agent 02
    def funny_meme_maker(state):
        description = state["description"]
        message = generate_funny_message(description)
        return {"funny_message": message}

# Add node
    graph.add_node("describe_image", image_reader_node)
    graph.add_node("info_generator", funny_meme_maker)

# Define flow
    graph.set_entry_point("describe_image")
    graph.add_edge("describe_image", "make_joke")
    graph.set_finish_point("make_joke")

    return graph.compile()

In [4]:
from langgraph.graph import StateGraph
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import TypedDict
import base64
from dotenv import load_dotenv
import os

load_dotenv()

# Step 1: Define state schema
class GraphState(TypedDict):
    image_base64: str
    description: str
    funny_message: str

# Step 2: Node to describe the image
def image_reader_node(state: GraphState) -> dict:
    model = ChatGoogleGenerativeAI(model="models/gemini-2.0-flash-exp-image-generation", temperature=0.2)

    message = {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": "What is the main object in this image? Respond with just a noun like 'cat' or 'bus'.",
            },
            {
                "type": "image_url",
                "image_url": {"url": f"data:image/jpeg;base64,{state['image_base64']}"},
            },
        ],
    }

    response = model.invoke([message])
    return {"description": response.content.strip()}

# Step 3: Node to generate a funny message
def funny_maker_node(state: GraphState) -> dict:
    model = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", temperature=0.8)
    
    prompt = f"Create a short, funny caption involving a {state['description']}, only 1 caption give without saying 'Here' or something."
    response = model.invoke(prompt)
    
    return {"funny_message": response.content.strip()}

# Step 4: Define and compile graph
def build_graph():
    graph = StateGraph(GraphState)

    graph.add_node("describe_image", image_reader_node)
    graph.add_node("make_joke", funny_maker_node)

    graph.set_entry_point("describe_image")
    graph.add_edge("describe_image", "make_joke")
    graph.set_finish_point("make_joke")

    return graph.compile()


import base64

# Load image and convert to base64
with open(pa, "rb") as f:
    image_base64 = base64.b64encode(f.read()).decode("utf-8")

# Run the graph
graph = build_graph()
final_state = graph.invoke({"image_base64": image_base64})

print("Description:", final_state["description"])
print("Funny Message:", final_state["funny_message"])


Description: Bus
Funny Message: My therapist told me to embrace my mistakes... so I hopped on the wrong bus.


In [1]:
import base64

# Load image and convert to base64
with open(pa, "rb") as f:
    image_base64 = base64.b64encode(f.read()).decode("utf-8")

# Run the graph
graph = build_graph()
final_state = graph.invoke({"image_base64": image_base64})

print("Description:", final_state["description"])
print("Funny Message:", final_state["funny_message"])


NameError: name 'pa' is not defined