## Setup

In [1]:
import os
from tqdm.notebook import tqdm
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

IMAGES_PATH = "/home/ubuntu/data/imagenette_xeno_truchas"


## Get Data

def find_candidate_images(images_path):
    """
    Finds all candidate images in the given folder and its sub-folders.

    Returns:
        images: a list of absolute paths to the discovered images.
    """
    images = []
    for root, dirs, files in os.walk(images_path):
        for name in files:
            file_path = os.path.abspath(os.path.join(root, name))
            if (os.path.splitext(name)[1]).lower() in [".jpg", ".png", ".jpeg"]:
                images.append(file_path)
    return images

## Create Clip Embeddings

![Alt text](assets/clip.svg)

In [2]:
# Load the model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Ensure the model is on GPU, if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")
model.to(device)

def encode_images(image_paths, batch_size=160):
    embeddings = []
    for i in tqdm(range(0, len(image_paths), batch_size)):
        batch_paths = image_paths[i:i+batch_size]
        images = [Image.open(path) for path in batch_paths]
        
        # Process the images batch
        inputs = processor(images=images, return_tensors="pt", padding=True)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        # Generate embeddings
        with torch.no_grad():
            outputs = model.get_image_features(**inputs)
        
        embeddings.extend(outputs.cpu().numpy())
        
    return embeddings

image_paths = find_candidate_images(IMAGES_PATH) # Add your image paths here
embeddings = encode_images(image_paths)

print(len(image_paths), len(embeddings))

device: cuda


  0%|          | 0/61 [00:00<?, ?it/s]

9640 9640


In [3]:
labels = [x.split("/")[5] for x in image_paths]
df = pd.DataFrame({"image_path": image_paths, "label":labels, "clip_embedding": embeddings})
df.to_pickle("vectors.pkl")

## Apply TSNE

In [4]:
# load saved vectors
df = pd.read_pickle("vectors.pkl")

In [5]:
X = np.array(df["clip_embedding"].tolist())

tsne = TSNE(
    n_components=2,
    verbose=2,
).fit_transform(X)

print(len(tsne), len(image_paths))

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 9640 samples in 0.002s...
[t-SNE] Computed neighbors for 9640 samples in 0.743s...
[t-SNE] Computed conditional probabilities for sample 1000 / 9640
[t-SNE] Computed conditional probabilities for sample 2000 / 9640
[t-SNE] Computed conditional probabilities for sample 3000 / 9640
[t-SNE] Computed conditional probabilities for sample 4000 / 9640
[t-SNE] Computed conditional probabilities for sample 5000 / 9640
[t-SNE] Computed conditional probabilities for sample 6000 / 9640
[t-SNE] Computed conditional probabilities for sample 7000 / 9640
[t-SNE] Computed conditional probabilities for sample 8000 / 9640
[t-SNE] Computed conditional probabilities for sample 9000 / 9640
[t-SNE] Computed conditional probabilities for sample 9640 / 9640
[t-SNE] Mean sigma: 1.875120
[t-SNE] Computed conditional probabilities in 0.234s
[t-SNE] Iteration 50: error = 81.9252625, gradient norm = 0.0275300 (50 iterations in 1.555s)
[t-SNE] Iteration 100: 

In [6]:
points = []
for i, f in enumerate(image_paths):
    point = [
        float(
            (tsne[i, k] - np.min(tsne[:, k]))
            / (np.max(tsne[:, k]) - np.min(tsne[:, k]))
        )
        for k in range(2)
    ]
    points.append(point)
    
df["tsne_coords"] = points
df.to_pickle("vectors.pkl")

## Visualization

In [9]:
df = pd.read_pickle("vectors.pkl")
df[['x', 'y']] = pd.DataFrame(df['tsne_coords'].tolist(), index=df.index)

In [10]:
import plotly.express as px

fig = px.scatter(df, x='x', y='y',
                 color='label', # Colors points based on the 'label' column
                 title='t-SNE Clustering Results',
                 labels={'x': 't-SNE Dimension 1', 'y': 't-SNE Dimension 2'}) # Custom axis labels

# Improve layout
fig.update_layout(legend_title_text='Label')

# Show the plot
fig.show()
