<a href="https://colab.research.google.com/github/davidtkeane/Google_Colab/blob/main/image_description.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CLIP Interrogator 2.4 by [@pharmapsychotic](https://twitter.com/pharmapsychotic)

Want to figure out what a good prompt might be to create new images like an existing one? The CLIP Interrogator is here to get you answers!

<br>

For Stable Diffusion 1.X choose the **ViT-L** model and for Stable Diffusion 2.0+ choose the **ViT-H** CLIP Model.

This version is specialized for producing nice prompts for use with Stable Diffusion and achieves higher alignment between generated text prompt and source image. You can try out the old [version 1](https://colab.research.google.com/github/pharmapsychotic/clip-interrogator/blob/v1/clip_interrogator.ipynb) to see how different CLIP models ranks terms.

You can also run this on HuggingFace and Replicate<br>
[![Generic badge](https://img.shields.io/badge/🤗-Open%20in%20Spaces-blue.svg)](https://huggingface.co/spaces/pharma/CLIP-Interrogator) [![Replicate](https://replicate.com/pharmapsychotic/clip-interrogator/badge)](https://replicate.com/pharmapsychotic/clip-interrogator)

<br>

If this notebook is helpful to you please consider buying me a coffee via [ko-fi](https://ko-fi.com/pharmapsychotic) or following me on [twitter](https://twitter.com/pharmapsychotic) for more cool Ai stuff. 🙂

And if you're looking for more Ai art tools check out my [Ai generative art tools list](https://pharmapsychotic.com/tools.html).


In [1]:
#@title Check GPU
!nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-272ce678-8605-2da6-3018-4da49c25fda7)


In [2]:
#!/usr/bin/python3

# Ranger made this code snippet.
# This script is used to mount Google Drive in Google Colab and sync files between the two locations.
# It uses the `rsync` command to copy files between the Colab VM and Google Drive.
# Loads Google Drive so you can save the model and use it again later.

# Install the required libraries

!pip install colorama

# Import the required libraries
import os
import shutil
from google.colab import drive
from colorama import Fore, Style, init
from IPython.display import display, HTML

# Initialize colorama
init()

# Access the API key from Colab Secrets
from google.colab import userdata

try:
    api_key = userdata.get('GOOGLE_DRIVE_API')
    print(f"{Fore.GREEN}✅ API Key loaded successfully.{Style.RESET_ALL}")

    # Set the API key as an environment variable
    os.environ['GOOGLE_DRIVE_API'] = api_key
except Exception as e:
    print(f"{Fore.RED}❌ Error loading API Key: {e}{Style.RESET_ALL}")
    api_key = None

# Mount Google Drive using the API key (if available)
if api_key:
    try:
        # Use the API key to authenticate and mount Google Drive
        drive.mount('/content/drive', force_remount=True)
        print(f"{Fore.GREEN}✅ Google Drive mounted successfully using API Key.{Style.RESET_ALL}")
    except Exception as e:
        print(f"{Fore.RED}❌ Error mounting Google Drive: {e}{Style.RESET_ALL}")
else:
    print(f"{Fore.YELLOW}⚠️ No API Key found. Please log in manually.{Style.RESET_ALL}")
    drive.mount('/content/drive', force_remount=True)

# Define the Google Drive folder path
drive_folder = "/content/drive/MyDrive/Colab_Projects/Image-Description"

# Define the Colab working directory
colab_folder = "/content/"

# Ensure the Colab folder exists
os.makedirs(colab_folder, exist_ok=True)

# Function to count files in a directory
def count_files(directory):
    return sum([len(files) for _, _, files in os.walk(directory)])

# Function to sync files from Colab to Google Drive
def sync_to_drive():
    try:
        print(f"{Fore.CYAN}🔄 Syncing files from Colab to Google Drive...{Style.RESET_ALL}")

        # Use rsync to copy files from Colab to Google Drive
        os.system(f"rsync -av --progress {colab_folder}/ {drive_folder}/")

        print(f"{Fore.GREEN}✅ Files synced from Colab to Google Drive.{Style.RESET_ALL}")
    except Exception as e:
        print(f"{Fore.RED}❌ Error syncing to Google Drive: {e}{Style.RESET_ALL}")

# Function to sync files from Google Drive to Colab
def sync_from_drive():
    try:
        print(f"{Fore.CYAN}🔄 Syncing files from Google Drive to Colab...{Style.RESET_ALL}")

        # Count files before sync
        initial_count = count_files(colab_folder)

        # Use rsync to copy files from Google Drive to Colab
        # -a: Archive mode (recursive, preserves permissions, symlinks, etc.)
        # -v: Verbose output
        # --progress: Show progress during transfer
        # --ignore-existing: Skip files that already exist in the destination
        # old command = os.system(f"rsync -av --progress {drive_folder}/ {colab_folder}/")
        os.system(f"rsync -av --progress {drive_folder} {colab_folder}/")
        # Count files after sync
        final_count = count_files(colab_folder)
        files_transferred = final_count - initial_count

        print(f"{Fore.GREEN}✅ Sync completed!{Style.RESET_ALL}")
        print(f"{Fore.GREEN}📂 Files transferred: {files_transferred}{Style.RESET_ALL}")

        # List files in the Colab folder to confirm sync
        print(f"{Fore.BLUE}📁 Files in Colab folder:{Style.RESET_ALL}")
        for root, dirs, files in os.walk(colab_folder):
            for file in files:
                print(os.path.join(root, file))
    except Exception as e:
        print(f"{Fore.RED}❌ Error syncing from Google Drive: {e}{Style.RESET_ALL}")

# Function to display a sync button
def display_sync_button():
    display(HTML('''
        <div>
            <button onclick="syncFromDrive()">Sync from Google Drive to Colab</button>
            <button onclick="syncToDrive()">Sync from Colab to Google Drive</button>
            <button onclick="refreshPage()">Refresh</button>
        </div>
        <script>
            function syncFromDrive() {
                google.colab.kernel.invokeFunction('sync_from_drive', [], {});
            }
            function syncToDrive() {
                google.colab.kernel.invokeFunction('sync_to_drive', [], {});
            }
            function refreshPage() {
                window.location.reload();
            }
        </script>
    '''))

# Register the sync functions with Colab
from google.colab import output
output.register_callback('sync_from_drive', sync_from_drive)
output.register_callback('sync_to_drive', sync_to_drive)


# Display the sync button
display_sync_button()

Collecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6
✅ API Key loaded successfully.
Mounted at /content/drive
✅ Google Drive mounted successfully using API Key.


In [4]:
#@title Setup

# Install the required libraries
import os, subprocess

def setup():
    install_cmds = [
        ['pip', 'install', 'gradio'],
        ['pip', 'install', 'open_clip_torch'],
        ['pip', 'install', 'clip-interrogator'],
    ]
    for cmd in install_cmds:
        print(subprocess.run(cmd, stdout=subprocess.PIPE).stdout.decode('utf-8'))

setup()

import os

# Define the save directory
save_directory = "/content/drive/MyDrive/Colab_Projects/Image-Description/model"

# Create the save directory if it doesn't exist
if not os.path.exists(save_directory):
    os.makedirs(save_directory)

# Define the model names
caption_model_name = 'blip-large' #@param ["blip-base", "blip-large", "git-large-coco"]
clip_model_name = 'ViT-L-14/openai' #@param ["ViT-L-14/openai", "ViT-H-14/laion2b_s32b_b79k"]

# Import the required libraries
import gradio as gr
from clip_interrogator import Config, Interrogator

# Create an Interrogator object
config = Config()
config.clip_model_name = clip_model_name
config.caption_model_name = caption_model_name
ci = Interrogator(config)

# Save the model (assuming the Interrogator object has a save method)
# This block is moved here so 'ci' is defined before being used
try:
    ci.save(save_directory)
    print(f"Model saved to {save_directory}")
except AttributeError:
    print("Warning: The Interrogator object does not have a 'save' method. Model saving skipped.")

# Define the image analysis function
def image_analysis(image):
    image = image.convert('RGB')
    image_features = ci.image_to_features(image)

    top_mediums = ci.mediums.rank(image_features, 5)
    top_artists = ci.artists.rank(image_features, 5)
    top_movements = ci.movements.rank(image_features, 5)
    top_trendings = ci.trendings.rank(image_features, 5)
    top_flavors = ci.flavors.rank(image_features, 5)

    medium_ranks = {medium: sim for medium, sim in zip(top_mediums, ci.similarities(image_features, top_mediums))}
    artist_ranks = {artist: sim for artist, sim in zip(top_artists, ci.similarities(image_features, top_artists))}
    movement_ranks = {movement: sim for movement, sim in zip(top_movements, ci.similarities(image_features, top_movements))}
    trending_ranks = {trending: sim for trending, sim in zip(top_trendings, ci.similarities(image_features, top_trendings))}
    flavor_ranks = {flavor: sim for flavor, sim in zip(top_flavors, ci.similarities(image_features, top_flavors))}

    return medium_ranks, artist_ranks, movement_ranks, trending_ranks, flavor_ranks

# Define the image to prompt function
def image_to_prompt(image, mode):
    ci.config.chunk_size = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    ci.config.flavor_intermediate_count = 2048 if ci.config.clip_model_name == "ViT-L-14/openai" else 1024
    image = image.convert('RGB')
    if mode == 'best':
        return ci.interrogate(image)
    elif mode == 'classic':
        return ci.interrogate_classic(image)
    elif mode == 'fast':
        return ci.interrogate_fast(image)
    elif mode == 'negative':
        return ci.interrogate_negative(image)




Loading caption model blip-large...


config.json:   0%|          | 0.00/4.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/527 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Loading CLIP model ViT-L-14/openai...


open_clip_model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

ViT-L-14_openai_artists.safetensors: 100%|██████████| 16.2M/16.2M [00:00<00:00, 55.2MB/s]
ViT-L-14_openai_flavors.safetensors: 100%|██████████| 155M/155M [00:00<00:00, 192MB/s]
ViT-L-14_openai_mediums.safetensors: 100%|██████████| 146k/146k [00:00<00:00, 5.35MB/s]
ViT-L-14_openai_movements.safetensors: 100%|██████████| 307k/307k [00:00<00:00, 8.17MB/s]
ViT-L-14_openai_trendings.safetensors: 100%|██████████| 111k/111k [00:00<00:00, 5.71MB/s]
ViT-L-14_openai_negative.safetensors: 100%|██████████| 63.2k/63.2k [00:00<00:00, 6.04MB/s]


Loaded CLIP model and data in 21.55 seconds.


In [5]:
#@title Image to prompt! 🖼️ -> 📝

# Define the prompt tab
def prompt_tab():
    with gr.Column():
        with gr.Row():
            image = gr.Image(type='pil', label="Image")
            with gr.Column():
                mode = gr.Radio(['best', 'fast', 'classic', 'negative'], label='Mode', value='best')
        prompt = gr.Textbox(label="Prompt")
    button = gr.Button("Generate prompt")
    button.click(image_to_prompt, inputs=[image, mode], outputs=prompt)

# Define the analyze tab
def analyze_tab():
    with gr.Column():
        with gr.Row():
            image = gr.Image(type='pil', label="Image")
        with gr.Row():
            medium = gr.Label(label="Medium", num_top_classes=5)
            artist = gr.Label(label="Artist", num_top_classes=5)
            movement = gr.Label(label="Movement", num_top_classes=5)
            trending = gr.Label(label="Trending", num_top_classes=5)
            flavor = gr.Label(label="Flavor", num_top_classes=5)
    button = gr.Button("Analyze")
    button.click(image_analysis, inputs=image, outputs=[medium, artist, movement, trending, flavor])

# Create the UI
with gr.Blocks() as ui:
    with gr.Tab("Prompt"):
        prompt_tab()
    with gr.Tab("Analyze"):
        analyze_tab()

# Launch the UI
ui.launch(show_api=False, debug=False)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://13699a0c161b5b2549.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [6]:
#@title Batch process a folder of images 📁 -> 📝

#@markdown This will generate prompts for every image in a folder and either save results
#@markdown to a desc.csv file in the same folder or rename the files to contain their prompts.
#@markdown The renamed files work well for [DreamBooth extension](https://github.com/d8ahazard/sd_dreambooth_extension)
#@markdown in the [Stable Diffusion Web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui).
#@markdown You can use the generated csv in the [Stable Diffusion Finetuning](https://colab.research.google.com/drive/1vrh_MUSaAMaC5tsLWDxkFILKJ790Z4Bl?usp=sharing)

import csv
import os
from IPython.display import clear_output, display
from PIL import Image
from tqdm import tqdm

# Define the folder path
folder_path = "/content/drive/MyDrive/Colab_Projects/Image-Description/images" #@param {type:"string"}
prompt_mode = 'best' #@param ["best","fast","classic","negative"]
output_mode = 'rename' #@param ["desc.csv","rename"]
max_filename_len = 128 #@param {type:"integer"}

# Function to sanitize a string for use as a filename
def sanitize_for_filename(prompt: str, max_len: int) -> str:
    name = "".join(c for c in prompt if (c.isalnum() or c in ",._-! "))
    name = name.strip()[:(max_len-4)] # extra space for extension
    return name

# Disable the CLIP logging
ci.config.quiet = True

# Get the list of image files in the folder
files = [f for f in os.listdir(folder_path) if f.endswith('.jpg') or f.endswith('.png')] if os.path.exists(folder_path) else []
prompts = []
for idx, file in enumerate(tqdm(files, desc='Generating prompts')):
    if idx > 0 and idx % 100 == 0:
        clear_output(wait=True)

    image = Image.open(os.path.join(folder_path, file)).convert('RGB')
    prompt = image_to_prompt(image, prompt_mode)
    prompts.append(prompt)

    print(prompt)
    thumb = image.copy()
    thumb.thumbnail([256, 256])
    display(thumb)

    if output_mode == 'rename':
        name = sanitize_for_filename(prompt, max_filename_len)
        ext = os.path.splitext(file)[1]
        filename = name + ext
        idx = 1
        while os.path.exists(os.path.join(folder_path, filename)):
            print(f'File {filename} already exists, trying {idx+1}...')
            filename = f"{name}_{idx}{ext}"
            idx += 1
        os.rename(os.path.join(folder_path, file), os.path.join(folder_path, filename))

if len(prompts):
    if output_mode == 'desc.csv':
        csv_path = os.path.join(folder_path, 'desc.csv')
        with open(csv_path, 'w', encoding='utf-8', newline='') as f:
            w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
            w.writerow(['image', 'prompt'])
            for file, prompt in zip(files, prompts):
                w.writerow([file, prompt])

        print(f"\n\n\n\nGenerated {len(prompts)} prompts and saved to {csv_path}, enjoy!")
    else:
        print(f"\n\n\n\nGenerated {len(prompts)} prompts and renamed your files, enjoy!")
else:
    print(f"Sorry, I couldn't find any images in {folder_path}")


Generating prompts: 0it [00:00, ?it/s]


Sorry, I couldn't find any images in /content/drive/MyDrive/Colab_Projects/Image-Description/images
