In [2]:
import os
import google.generativeai as genai
import PIL.Image
import dotenv

In [3]:
# Configure the API
dotenv.load_dotenv("key.env")
api_key = os.getenv("GOOGLE_API_KEY")

genai.configure(api_key=api_key)

# Create the model
generation_config = {
    "temperature": 0.7,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 65536,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-thinking-exp-01-21",
    generation_config=generation_config,
    system_instruction="You will be given an input as a word and an image. You need to warn the user that this is a dangerous animal (if it is).\nHere's how you need to structure your output:\nWhat the animal is, what to do right now,\nHow dangerous is it (0-10),\nHow fast is it (human speed, slow than humans, or higher than humans.). Add some more details after this, like what it hunts, etc.\n\nRemember, the user currently might be in an emergency. Answer appropriately. Do not waste ANY characters. But also make sure to make clear what you are writing. (Don't write \"faster\", write \"this is faster than humans\". Tell the user what to do appropriately. If you say only \"slowly back away\" for everything, it is incorrect. There are different counters to different animals.\n",
)

In [4]:
def process_input(text_input, image_path):
    # Load the image
    image = PIL.Image.open(image_path)
    
    # Start a chat session
    chat_session = model.start_chat(history=[])
    
    # Send the message with both text and image
    response = chat_session.send_message([text_input, image])
    
    return response.text

In [7]:
text_input = "Grizzly Bear"
image_path = "Images/GrizzlyBear.png"

output = process_input(text_input, image_path)
print(output)

Grizzly Bear. Do not run. Back away slowly. Be big. If attacked, play dead.
Danger: 9/10.
Speed: Faster than humans. Hunts large animals.
