# Gemini Image Generation 

### imports and env vars loading

In [5]:
from google import genai
from google.genai import types
from PIL import Image
from io import BytesIO
import base64
import dotenv

dotenv.load_dotenv()

client = genai.Client()

In [15]:
def generate_image_with_streaming(prompt: str):
    """
    Generate image with streaming text first, then yield the image
    Fixed: Use regular text model for streaming, then image model for generation
    """
    print("🎨 Starting image generation...")
    print("📝 Generating description...")
    
    # First stream the text description using regular Gemini model
    text_response = ""
    description_prompt = f"Describe in detail what a 3D rendered image should look like for this request: {prompt}"
    
    stream = client.models.generate_content_stream(
        model="gemini-2.0-flash",  # Use regular model for text streaming
        contents=description_prompt
    )
    
    print("\n💭 AI Description:")
    for chunk in stream:
        if chunk.text:
            text_response += chunk.text
            print(chunk.text, end="", flush=True)
    
    print("\n\n🖼️ Now generating image based on description...")
    
    # Then generate the actual image using the enhanced prompt
    enhanced_prompt = f"{prompt}. Detailed vision: {text_response}"
    
    image_response = client.models.generate_content(
        model="gemini-2.0-flash-preview-image-generation",
        contents=enhanced_prompt,
        config=types.GenerateContentConfig(
            response_modalities=['IMAGE', 'TEXT']  # Both required for image model
        )
    )
    
    # Extract and save the image
    for part in image_response.candidates[0].content.parts:
        if part.inline_data is not None:
            image = Image.open(BytesIO(part.inline_data.data))
            filename = 'gemini-streamed-image.png'
            image.save(filename)
            print(f"✅ Image saved as {filename}")
            image.show()
            return image, text_response
    
    return None, text_response

# Example usage
prompt = 'Hi, can you create a 3d rendered image of Madurai Meenakshi Amman with Lord Shiva to the right'

In [16]:
# Run the streaming image generation
image, description = generate_image_with_streaming(prompt)

🎨 Starting image generation...
📝 Generating description...

💭 AI Description:


ClientError: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': 'The requested combination of response modalities is not supported by the model. models/gemini-2.0-flash-preview-image-generation accepts the following combination of response modalities:\n* IMAGE, TEXT\n', 'status': 'INVALID_ARGUMENT'}}

In [19]:
def generate_image_for_ui(prompt: str):
    """
    Generator function that yields text chunks first, then the final image
    Perfect for web UI integration with real-time updates
    Fixed: Use regular text model for streaming, then image model
    """
    import time

    
    yield {"type": "status", "message": "🖼️ Now generating image..."}
    
    # Generate image using the enhanced prompt
    
    image_response = client.models.generate_content(
        model="gemini-2.0-flash-preview-image-generation",
        contents=prompt,
        config=types.GenerateContentConfig(
            response_modalities=['IMAGE', 'TEXT']  # Both required for image model
        )
    )
    
    # Extract and yield image
    for part in image_response.candidates[0].content.parts:
        if part.inline_data is not None:
            # Convert to base64 for web transfer
            image_data = part.inline_data.data
            image_b64 = base64.b64encode(image_data).decode('utf-8')
            
            # Also save locally
            image = Image.open(BytesIO(image_data))
            filename = f'gemini-ui-image-{int(time.time())}.png'
            image.save(filename)
            
            yield {
                "type": "image_complete",
                "image_b64": image_b64,
                "filename": filename,
                "message": f"✅ Image generated and saved as {filename}"
            }
            break
    
    yield {"type": "complete", "message": "🎉 Generation complete!"}


In [21]:
# Demo the UI-friendly generator
print("=== UI-Friendly Streaming Demo ===")

for update in generate_image_for_ui(prompt):
    if update["type"] == "image_complete":
        print(f"\n{update['message']}")
        print(f"📊 Image size: {len(update['image_b64'])} base64 chars")
        # Display the image
        image_data = base64.b64decode(update['image_b64'])
        image = Image.open(BytesIO(image_data))
        image.show()
    elif update["type"] == "complete":
        print(f"\n{update['message']}")


=== UI-Friendly Streaming Demo ===

✅ Image generated and saved as gemini-ui-image-1749421819.png
📊 Image size: 1304616 base64 chars

🎉 Generation complete!
