**User Input and Image Generation**

Steps with Gradio:
1. Ask User for Image Concepts: Use Gradio to create a text input interface where users can enter their image concepts.
2. Clean User Input: Ensure the prompt is clean of unnecessary spaces or characters.
3. Prompt the Image Generation Agent: Call APIs to generate images using Stable Diffusion XL and DALL-E based on user input.

In [1]:
# Import Dependencies

# For environment variables
import os
from dotenv import load_dotenv

#For dataframes
import pandas as pd

# For API calls
import requests
import openai

# For image processing and viewing
import base64
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt

# For UI
import gradio as gr

# For caption generation
from transformers import BlipProcessor, BlipForConditionalGeneration

# Load dependencies to measure text output
import nltk
from rouge_score import rouge_scorer

# BLEU Score
from nltk.translate.bleu_score import sentence_bleu

import numpy as np
from sklearn.cluster import KMeans



In [2]:
# Load environment variables.
load_dotenv()

# Store the API key in a variable.
openai.api_key = os.getenv("OPENAI_API_KEY")
STABILITY_AI_API_KEY = os.getenv("STABILITY_AI_API_KEY")

# Set the model name for our LLMs.
OPENAI_MODEL = "gpt-3.5-turbo"

In [3]:
# Utility to view images for debugging
def view_base64_image(base64_string):
    """Decodes a base64 encoded image and displays it using matplotlib."""

    # Decode the base64 string
    image_data = base64.b64decode(base64_string)

    # Open the image using PIL
    image = Image.open(io.BytesIO(image_data))

    # Display the image
    plt.imshow(image)
    plt.axis('off')
    plt.show()

In [4]:
# Utility to clean user input by removing extra spaces
def clean_user_input(user_query):
    cleaned_query = ' '.join(user_query.split())
    return cleaned_query

In [5]:
# Call Stability AI API and generate an image
def generate_stability_ai_image(prompt):
    host = 'https://api.stability.ai/v2beta/stable-image/generate/sd3'
    params = {
        "prompt" : prompt,
        "negative_prompt" : '',
        "aspect_ratio" : '1:1',
        "seed" : 0,
        "output_format" : 'jpeg',
        "model" : "sd3",
        "mode" : "text-to-image"
    }
    headers = {
        "Accept": "image/*",
        "Authorization": f"Bearer {STABILITY_AI_API_KEY}"
    }

    # Encode parameters
    files = {}
    image = params.pop("image", None)
    mask = params.pop("mask", None)
    if image is not None and image != '':
        files["image"] = open(image, 'rb')
    if mask is not None and mask != '':
        files["mask"] = open(mask, 'rb')
    if len(files)==0:
        files["none"] = ''

    # Send request
    print(f"Sending REST request to {host}...")
    response = requests.post(
        host,
        headers=headers,
        files=files,
        data=params
    )
    if not response.ok:
        raise Exception(f"HTTP {response.status_code}: {response.text}")

    return base64.b64encode(response.content)
    # To test the function: response = generate_stability_ai_image("cute shiba inu")


In [6]:
# Call Dall-E Open AI API and generate an image
def call_dalle_api(prompt):
    client = openai.OpenAI()
    response = client.images.generate(
    model="dall-e-2",
    prompt=prompt,
    size="512x512",
    quality="standard",
    n=1,
    response_format="b64_json"
    )

    return response.data[0].b64_json
    # To test the function: response = call_dalle_api("A realistic image of a shiba inu with a birthday hat on the street")

In [7]:
# Convert base64 string to a PIL Image object
def base64_to_pil_image(base64_string):

    # Decode the base64 string
    image_data = base64.b64decode(base64_string)

    # Create a BytesIO object from the decoded data
    image_bytes = BytesIO(image_data)

    # Open the image using PIL
    image = Image.open(image_bytes)

    return image

In [40]:
import time

# Generate images and assess their quality
def generate_images_with_quality(prompt):
    cleaned_prompt = clean_user_input(prompt)
    stability_ai_image = base64_to_pil_image(generate_stability_ai_image(cleaned_prompt))
    dalle_image = base64_to_pil_image(call_dalle_api(cleaned_prompt))

    timestamp = time.time()
    
    with open(f"generated_images/{timestamp}_prompt.txt", "w") as f:
        # Write text to the file
        f.write(f'{prompt}, {cleaned_prompt}')
        
    stability_ai_image.save(f'generated_images/{timestamp}_stability_ai.jpg')
    dalle_image.save(f'generated_images/{timestamp}_dalle.jpg')

    #sd_quality = assess_image_quality(sd_image)
    #dalle_quality = assess_image_quality(dalle_image)
    return stability_ai_image, dalle_image #, f"Quality: {sd_quality}", dalle_image, f"Quality: {dalle_quality}"

In [41]:
# Gradio interface setup for user interaction
iface = gr.Interface(fn=generate_images_with_quality,
                     inputs="text",
                     outputs=[gr.Image(type="pil", label="Stability AI Image"), gr.Image(type="pil", label="Dall-E Image")],
                     title="Text-to-Image Generation",
                     description="Input a concept to generate images using Stability AI and Dall-E.")

iface.launch(share=True)

Running on local URL:  http://127.0.0.1:7866
Running on public URL: https://9ec2ee4c85ff9c85cc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Sending REST request to https://api.stability.ai/v2beta/stable-image/generate/sd3...


**Image Evaluation, Relevance Check, and Description Generation**

Steps:
1. Evaluate Image Relevance: Use an LLM to assess whether the generated image is relevant to the prompt.
2. Generate Image Descriptions: Use a classifier to create descriptive summaries of the images.
3. Compare Models: Analyze how each model's output aligns with the user query.

In [None]:
# Load the pre-trained model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

In [None]:
# Function for caption generation
def generate_caption(image):
    # Process the image and generate a caption
    inputs = processor(images=image, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

In [None]:
# Load prompt for images
prompt_file = open('generated_images/1730005832.2413912_prompt.txt')
prompt = prompt_file.read().split(',')[1]

In [None]:
# Generate caption for Stability AI image
img = Image.open("generated_images/1730005832.2413912_stability_ai.jpg")
stability_ai_caption = generate_caption(img)

In [None]:
# Generate caption for Dall-E image
img = Image.open("generated_images/1730005832.2413912_dalle.jpg")
dalle_caption = generate_caption(img)

In [None]:
# Generate Stability AI BLEU score
# Prompted captions and generated captions
reference_captions = [prompt]
generated_captions = [stability_ai_caption]

# Calculate BLEU scores for each pair of reference and generated captions
bleu_scores = [sentence_bleu([ref.split()], gen.split()) for ref, gen in zip(reference_captions, generated_captions)]

# Calculate the average BLEU score
stability_ai_bleu = sum(bleu_scores) / len(bleu_scores)

# Print the average BLEU score
print(f"Average BLEU Score: {stability_ai_bleu:.4f}")

In [None]:
# Generate Dall-E BLEU score
# Prompted captions and generated captions
reference_captions = [prompt]
generated_captions = [dalle_caption]

# Calculate BLEU scores for each pair of reference and generated captions
bleu_scores = [sentence_bleu([ref.split()], gen.split()) for ref, gen in zip(reference_captions, generated_captions)]

# Calculate the average BLEU score
dalle_bleu = sum(bleu_scores) / len(bleu_scores)

# Print the average BLEU score
print(f"Average BLEU Score: {dalle_bleu:.4f}")

In [None]:
# Function to calculate Rouge score
def calculate_rouge(reference, generated):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return scores

In [None]:
# Generate Stability AI ROUGE Score
reference_captions = [prompt]
generated_captions = [stability_ai_caption]
for ref, gen in zip(reference_captions, generated_captions):
    stability_ai_rouge_score = calculate_rouge(ref, gen)
    print(f"ROUGE scores for reference: '{ref}' and generated: '{gen}': {stability_ai_rouge_score}")

In [None]:
# Generate Dall-E ROUGE Score
reference_captions = [prompt]
generated_captions = [dalle_caption]
for ref, gen in zip(reference_captions, generated_captions):
    dalle_rouge_score = calculate_rouge(ref, gen)
    print(f"ROUGE scores for reference: '{ref}' and generated: '{gen}': {dalle_rouge_score}")

In [None]:
# Tunji need to clean up graph, don't need to recalculate score (?)

# Plot results
# Sample reference and generated captions
reference_captions = [prompt, prompt]
generated_captions = [stability_ai_caption, dalle_caption]

# Calculate BLEU scores
bleu_scores = [sentence_bleu([ref.split()], gen.split()) for ref, gen in zip(reference_captions, generated_captions)]

# Calculate ROUGE scores
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
rouge_scores = [scorer.score(ref, gen) for ref, gen in zip(reference_captions, generated_captions)]
rouge1_scores = [score['rouge1'].fmeasure for score in rouge_scores]
rouge2_scores = [score['rouge2'].fmeasure for score in rouge_scores]
rougeL_scores = [score['rougeL'].fmeasure for score in rouge_scores]

# Prepare data for K-means
X = np.array(list(zip(bleu_scores, rouge1_scores)))

# Apply K-means clustering
kmeans = KMeans(n_clusters=1)  # Choose number of clusters
kmeans.fit(X)
labels = kmeans.labels_

# Plotting
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.title('K-means Clustering of BLEU and ROUGE Scores')
plt.xlabel('BLEU Score')
plt.ylabel('ROUGE-1 Score')
plt.grid(True)
plt.show()

Output results to file

In [None]:
# Produce output
# prompt, stability ai caption, dalle caption, stability ai relevance, dalle relevance
file_output = open('image_generation_results.csv', 'a')
file_output.write(f'"{prompt}","{stability_ai_caption}","{dalle_caption}","{stability_ai_bleu}","{dalle_bleu}","{stability_ai_rouge_score}","{dalle_rouge_score}"')
file_output.close()

In [None]:
# Load dataframe
df = pd.read_csv('image_generation_results.csv')
df