# Seleccionar imágenes mediante CLIP



In [None]:
#@title ▶ Instalamos las librerías necesarias

! pip install -q ftfy regex tqdm mediapy
! pip install -q git+https://github.com/openai/CLIP.git

In [None]:
#@title ▶ Conectanos el drive

from google.colab import drive
drive.mount('/content/drive')

Definimos las funciones necesarias

In [None]:
#@title ▶ Definimos las funciones necesarias

from clip import clip
import torch
from PIL import Image
import glob
import os
import math
import numpy as np
import mediapy as media

def precompute(model, preprocess, device, files):
    # You can try tuning the batch size for very large videos, but it should usually be OK
    batch_size = 256
    batches = math.ceil(len(files) / batch_size)

    # The encoded features will bs stored in images_features
    images_features = torch.empty([0, 512], dtype=torch.float16).to(device)

    # Process each batch
    for i in range(batches):
        print(f"Processing batch {i + 1}/{batches}")

        # Get the relevant frames
        batch_files = files[i * batch_size: (i + 1) * batch_size]

        # Load the images for the batch
        images = load_images(batch_files)
        # Preprocess the images for the batch
        batch_preprocessed = torch.stack([preprocess(image) for image in images]).to(device)

        # Encode with CLIP and normalize
        with torch.no_grad():
            batch_features = model.encode_image(batch_preprocessed)
            batch_features /= batch_features.norm(dim=-1, keepdim=True)

        # Append the batch to the list containing all features
        images_features = torch.cat((images_features, batch_features))

    return images_features


def load_images(files):
    images = []
    for file in files:
        image = Image.open(file)
        images.append(image)
        # images.append(np.asarray(image))

    return images


def find_closest_text(model, device, files, image_features, text, num):
    # Encode and normalize the search query using CLIP
    with torch.no_grad():
        text_features = model.encode_text(clip.tokenize(text).to(device))
        text_features /= text_features.norm(dim=-1, keepdim=True)

    # Compute the similarity between the search query and each frame using the Cosine similarity
    similarities = (100.0 * image_features @ text_features.T.cpu().numpy()).flatten()
    # Get the nth most similar
    ind = np.argpartition(similarities, -num)[-num:]
    ind_sorted = ind[np.argsort(similarities[ind]*-1)]
    return np.array(files)[ind_sorted]


def find(folder, description, num=10):
    files = glob.glob(os.path.join(folder, '*.jpg'))

    # Load the open CLIP model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-B/32", device=device)

    if os.path.isfile(os.path.join(folder, 'image_features.npz')):
        image_features = np.load(os.path.join(folder, 'image_features.npz'))["arr_0"]
    else:
        image_features = precompute(model, preprocess, device, files).cpu()
        np.savez_compressed(os.path.join(folder, 'image_features.npz'), image_features)

    return find_closest_text(model, device, files, image_features, description, num)

In [None]:
#@title ▶ Buscamos las imágenes que más se adecuan a una descripción

#@markdown La primera vez procesará todas las imágenes, así que tardará un poco,
#@markdown en función del número de imágenes. Generará un fichero llamado 
#@markdown image-features.npz en el directorio de la imágenes que usará en 
#@markdown búsquedas futuras. Si se añaden o quitan imágenes del directorio 
#@markdown hay que borrar este fichero para que lo recalcule.

#@markdown 💬 Directorio de las imágenes (en formato jpg)
input_folder = "/content/drive/MyDrive/BAU-DD/person" #@param {type:"string"}

#@markdown 💬 Descripción con la que buscar
input_description = "a skilled worker" #@param {type:"string"}

#@markdown 💬 Número de imágenes a seleccionar
input_number = 10#@param {type:"integer"}

selected_files = find(input_folder, input_description, input_number)
images = [Image.open(file) for file in selected_files]
filenames = [os.path.basename(file) for file in selected_files]
media.show_images(images, border=True, height=256, columns=4, titles=filenames)

In [None]:
#@title ▶ Descargamos la selección

safe_filename = "".join([c for c in input_description if c.isalpha() or c.isdigit() or c==' ']).rstrip().replace(" ", "_")
file_list = " ".join(selected_files)

!rm "/tmp/{safe_filename}.zip"
!zip -qjr -0 /tmp/{safe_filename}.zip $file_list

from google.colab import files
files.download(os.path.join('/tmp', safe_filename + ".zip"))

# Credits

Taller Estampa https://tallerestampa.com / https://github.com/estampa

### Based on

[Clip](https://github.com/openai/CLIP)
