In [1]:
# Google GenAI (for Chat)
import google.generativeai as genai
from google.generativeai.types import FunctionDeclaration, Tool
from google import genai as g_genai
from google.genai import types
# Vertex AI (for Image Generation)
#import vertexai
#from vertexai.preview.vision_models import ImageGenerationModel

import json
import nbimporter
import utility_fncs, global_vars, week2_novel
from IPython.display import Image as IPythonImage
import base64
from io import BytesIO
from PIL import Image as PILImage

In [2]:
def create_gemini_declaration_from_function(func): # The Gemini adapter
    openai_tool_dict = utility_fncs.create_tool_from_function(func)
    function_details = openai_tool_dict['function']
    return FunctionDeclaration(name=function_details['name'], description=function_details['description'], parameters=function_details['parameters'])


In [3]:
from google import genai as g_genai
from google.genai import types
from PIL import Image as PILImage
def create_image_with_text_model(prompt: str):
    """
    Generates a high-quality image using Google's Gemini model gemini-2.0-flash-preview-image-generation.
    The generated image is returned in a dictionary by saving in a key content.

    Args:
        prompt (str): A detailed text description of the image to create.
                      For best results, be specific about the subject,
                      setting, and artistic style.
    """
    return_dict={
        'type':'image',
        'content':'',
    }    
    client = g_genai.Client()

    contents = prompt
    
    response = client.models.generate_content(
        model=global_vars.gemini_image_model,
        contents=contents,
        config=types.GenerateContentConfig(
          response_modalities=['TEXT', 'IMAGE']
        )
    )

    for part in response.candidates[0].content.parts:
      if part.text is not None:
        print(part.text)
      elif part.inline_data is not None:
        pil_image = PILImage.open(BytesIO((part.inline_data.data)))
        return_dict['content']=pil_image
        #image.save('gemini-native-image.png')
        #image.show()
    return return_dict

In [None]:
#a = create_image_with_text_model('can you create a image of a cycle for me. it should be underneath a tree during a sunset. the sun light must be falling on the cycle')

In [None]:
def create_image_with_vertex_ai(prompt: str):
    """
    Generates a high-quality image using Google's Imagen model on Vertex AI.
    The generated image is saved to a local file.

    Args:
        prompt (str): A detailed text description of the image to create.
                      For best results, be specific about the subject,
                      setting, and artistic style.
    """
    print(f"--- Initializing Vertex AI to generate image for prompt: '{prompt}' ---")
    
    return_dict={
        'type':'image',
        'content':'',
    }    
    # --- Configuration ---
    # Replace with your project ID and location
    PROJECT_ID = "gen-lang-client-0023157158"
    LOCATION = "us-central1" # Or any other supported location
    
    # Initialize Vertex AI
    vertexai.init(project=PROJECT_ID, location=LOCATION)
    
    # --- Model and Prompt ---
    # Load the image generation model. "imagegeneration@006" is the latest as of mid-2024.
    model = ImageGenerationModel.from_pretrained("imagegeneration@006")
    
    prompt_for_image = prompt
    
    print(f"Generating image with prompt: {prompt_for_image}")
    
    # --- Generate the Image ---
    # Note: The method is `generate_images`, not `generate_image`
    response = model.generate_images(
        prompt=prompt_for_image,
        number_of_images=1,  # You can generate up to 8 images at once
        # Optional parameters:
        # negative_prompt="text, watermark, blurry",
        # aspect_ratio="1:1", # "1:1", "9:16", "16:9"
        # seed=42
    )
    
    # --- Save the Image ---
    # The response contains a list of Image objects
    if response.images:
        # Get the first image from the list
        generated_image = response.images[0]
        image_bytes = generated_image._image_bytes
        image_stream = BytesIO(image_bytes)
        pil_image = PILImage.open(image_stream)
        # Save the image to a file
        #image_filename = "gemini_generated_image.png"
        #generated_image.save(location=image_filename)
        return_dict['content']=pil_image
        #return , json.dumps({"status": "success", "filename": image_filename})
    #else:
     #   return None, json.dumps({"status": "failed", "message": "API returned no images."})
    #except Exception as e:
     #   print(f"An error occurred during image generation: {e}")
      #  return json.dumps({"status": "error", "message": str(e)})  
    return return_dict

In [4]:
available_functions = {"create_image_with_text_model": create_image_with_text_model}
all_declarations = [create_gemini_declaration_from_function(func) for func in available_functions.values()]
gemini_tools = Tool(function_declarations=all_declarations)
model_gemini = genai.GenerativeModel(model_name=global_vars.gemini_text_model, tools=[gemini_tools])

In [7]:
gemini_tools.function_declarations[0].name

'create_image_with_text_model'

In [None]:
#prompt = "Please create an image of a majestic lion in a african forest, in a photorealistic style."
#print(f"User: {prompt}\n")
prompt_google_gemini = []


def gemini_tool_call(prompt, file=None, model=None):
    # --- 1. First API Call ---
    # Send the user's prompt to the model
    conversation_history = []
    prompt_msg=[{"text": prompt}]
    prompt_google_gemini.append({"role": "user", "parts": prompt_msg})
    response = model_gemini.generate_content(prompt_google_gemini) # The message can be a simple list of strings
    final_output=''
    image=None
    # --- 2. Check for a function call and execute it ---
    function_call = response.candidates[0].content.parts[0].function_call
    if function_call:
        print(f"Gemini wants to call function: '{function_call.name}'")
        function_to_call = available_functions[function_call.name]
        function_args = {key: value for key, value in function_call.args.items()}
        
        # Execute the local function
        function_response_data = function_to_call(**function_args)
        print(f"Function result: {function_response_data}\n")
        if (isinstance(function_response_data,dict)):
            if function_response_data.get('type')=='image':
                image = function_response_data.get('content')
                print(image)
                function_response_data = "Image created as per user's request"
            else:
                function_response_data=function_response.get('content','')
        user_turn = {'role': 'user', 'parts': [{'text': prompt}]}
        
        model_turn = {'role': 'model', 'parts': response.candidates[0].content.parts}
    
        # Turn 3: The result from our tool.
        tool_turn = {
            'role': 'tool',
            'parts': [
                {'function_response': {
                    "name": function_call.name,
                    "response": {"result": function_response_data}
                }}
            ]
        }
        prompt_google_gemini.append(model_turn)
        prompt_google_gemini.append(tool_turn)
        # --- 3. Second API Call ---
        # We must construct the history manually for the stateless call.
        # History = [user_prompt, model_function_call, function_result]
        conversation_history.extend([user_turn, model_turn, tool_turn])
        
        # Send the full history back to get the final text response
        final_response = model_gemini.generate_content(conversation_history)
        final_output = final_response.text
        
        #image='gemini_generated_image.png'
    else:
        # If the model didn't call a function, just print its response
        final_output = response.text
        image=None
    prompt_google_gemini.append({"role":"model","parts":[{"text":final_output}]})
    return final_output, image

In [None]:
#a=gemini_tool_call('old age with nostalgic mood')
#IPythonImage('gemini_generated_image.png')

In [None]:
launcher = utility_fncs.get_gradio_multi_modal_launcher(gemini_tool_call)

In [None]:
launcher.launch()