# generate image description using gemini

In [1]:
!pip install streamlit
!pip install pyngrok
!pip install google-generativeai
!pip install Pillow

Collecting streamlit
  Downloading streamlit-1.41.1-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m820.1 kB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.41.1-py2.py3-none-any.whl (9.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m


In [None]:
%%writefile test2.py

import streamlit as st
import random
import os
import requests
from io import BytesIO
import google.generativeai as genai

# Configure Gemini API
genai.configure(api_key="AIzaSyD-pu0AuG2dbzzspRfgS8DjO10Ffh08JiU")

generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

gemini_model = genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
)

def describe_image(image_path):
    try:
        chat_session = gemini_model.start_chat(history=[])
        prompt = f"""You are given an image. Analyze the image and provide a detailed description."""
        with open(image_path, "rb") as img_file:
            image_data = img_file.read()
        response = chat_session.send_message([prompt, image_data])
        st.write(response.text)
    except Exception as e:
        st.error(f"Error retrieving description: {e}")

def get_image_description(image_path, response):
    try:
        chat_session = gemini_model.start_chat(history=[])
        prompt = f"""You are given an image related. Analyze the image and detailed description. Also consider the questions and their answers related to that image. These are as follows : {response}. Now based on the Image description and the answers to the questions tell me whether these responses indicate normal or depression or ptsd or anxiety or bipolar. I need only one word answer. I need no explanation. Please give the exactly correct one word answer about what you think."""

        with open(image_path, "rb") as img_file:
            image_data = img_file.read()

        response = chat_session.send_message([prompt, image_data])
        return response.text.strip().lower()
    except Exception as e:
        st.error(f"Error: {e}")
        return ""

# Define the image directory
IMAGE_DIR = "images"

# Randomly select a number from 0 to 9 and construct the image filename
random_number = random.randint(0, 9)
selected_image = f"{random_number}.png"  # Change the extension if needed (e.g., .jpg)

# Define Rorschach test questions
questions = [
    "What do you see in this image?",
    "What emotions does this image evoke in you?",
    "Does this image remind you of anything from your past?",
    "If this image had a story, what would it be?",
    "Do you see anything changing in the image over time?"
]

# Streamlit App
st.title("Describe Image and Classify Responses")

# Display the selected image
image_path = os.path.join(IMAGE_DIR, selected_image)
if os.path.exists(image_path):
    st.image(image_path, caption=f"Look at the image and answer the questions.", use_container_width=True)
else:
    st.error(f"Image {selected_image} not found in {IMAGE_DIR}.")

# Collect responses
responses = {}
st.subheader("Answer the following questions:")
for i, question in enumerate(questions):
    responses[f"Q{i+1}"] = st.text_area(question, key=f"q{i+1}")

# Submit button
combined_response = ""
if st.button("Submit Responses"):
    st.success("Responses submitted successfully!")
    st.write("Here are your responses:")

    with st.expander("Your Responses", expanded=False):
      for q, ans in responses.items():
        combined_response += f"{q}: {ans}\n"
        st.write(f"**{q}**: {ans}")
        st.write(combined_response)

    describe_image(image_path)
    st.success(get_image_description(image_path, combined_response))


Writing test2.py


# Locally storing random image to prevent automatic random changing

In [2]:
%%writefile test3.py

import streamlit as st
import random
import os
import requests
from io import BytesIO
import google.generativeai as genai
from PIL import Image

# Configure Gemini API
genai.configure(api_key="AIzaSyD-pu0AuG2dbzzspRfgS8DjO10Ffh08JiU")

generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

gemini_model = genai.GenerativeModel(
    model_name="gemini-1.5-flash",
    generation_config=generation_config,
)

def get_image_description(image_path, response):
    try:
        image_data = Image.open(RANDOM_IMAGE_PATH)
        questions_text = ' '.join(questions)
        chat_session = gemini_model.start_chat(history=[])
        prompt = f"""You are given an image. Analyze the image and consider the questions and their answers related to that image. The Questions that are asked as follows: {questions_text}. The responses are as follows : {response}. Compare the image description that you think with the answers. Now based on the {response} to the {questions_text} tell me whether these {response} indicate normal or depression or ptsd or anxiety or bipolar. I need only one word answer. I need no explanation. Please give the exactly correct one word answer about what you think."""

        response = gemini_model.generate_content([prompt, image_data])
        return response.text.strip().lower()
    except Exception as e:
        st.error(f"Error: {e}")
        return ""

# Define the image directory
IMAGE_DIR = "images"
RANDOM_IMAGE_PATH = os.path.join("./", "randim.png")
# st.write(RANDOM_IMAGE_PATH)
randnum = 10
# Function to generate or retrieve the random image
def get_random_image():
    if not os.path.exists(RANDOM_IMAGE_PATH):
        # st.write("hello")
        random_number = random.randint(0, 9)
        # st.write(random_number)
        selected_image = os.path.join(IMAGE_DIR, f"{random_number}.png")

        if os.path.exists(selected_image):
            # Open selected image and write its content to "randim.png"
            with open(selected_image, "rb") as src_file:
                with open(RANDOM_IMAGE_PATH, "wb") as dest_file:
                    # st.write("hello2")
                    dest_file.write(src_file.read())
        else:
            st.error(f"Image {selected_image} not found in {IMAGE_DIR}.")
        return RANDOM_IMAGE_PATH, random_number
    else:
        # st.write("hello3")
        return RANDOM_IMAGE_PATH, randnum


# Define Rorschach test questions
questions = [
    "What do you see in this image?",
    "What emotions does this image evoke in you?",
    "Does this image remind you of anything from your past?",
    "If this image had a story, what would it be?",
    "Do you see anything changing in the image over time?"
]

# Streamlit App
st.title("Describe Image and Classify Responses")

# Display the selected image
rand_num = 10
image_path, randnum = get_random_image()
if randnum != 10:
  rand_num = randnum

if os.path.exists(image_path):
    st.image(image_path, caption=f"Look at the image and answer the questions.", use_container_width=True)
else:
    st.error(f"Image not found in {IMAGE_DIR}.")

# Collect responses
responses = {}
st.subheader("Answer the following questions:")
for i, question in enumerate(questions):
    responses[f"Q{i+1}"] = st.text_area(question, key=f"q{i+1}")

# Submit button
combined_response = ""
if st.button("Submit Responses"):
    st.success("Responses submitted successfully!")
    st.write("Here are your responses:")

    with st.expander("Your Responses", expanded=False):
      i=0
      for q, ans in responses.items():
        combined_response += f"{ans}\n"
        st.write(f"**{q}** ANS : {ans}")
        i+=1
        # st.write(f"\n\n {combined_response}")

    st.success(get_image_description(RANDOM_IMAGE_PATH, combined_response))

    # Clean up the temporary "randim.png" file at the end of execution
    if os.path.exists(RANDOM_IMAGE_PATH):
        os.remove(RANDOM_IMAGE_PATH)

Writing test3.py


In [3]:
# Import ngrok
from pyngrok import ngrok

# Set your authtoken
ngrok.set_auth_token("2ohUKqk37HcGbvwN0s8Y1E2WNxE_39z1gVF3bYq9vFSEm7Wzq") # Replace YOUR_AUTHTOKEN with your actual authtoken

# Kill any existing ngrok processes
ngrok.kill()

# Start Streamlit with nohup
!nohup streamlit run test3.py &

# Create a public URL with ngrok to access the app
public_url = ngrok.connect(addr='8501')
print(f"Public URL: {public_url}")

nohup: appending output to 'nohup.out'
Public URL: NgrokTunnel: "https://34ff-104-196-61-155.ngrok-free.app" -> "http://localhost:8501"
