# <center>Critical AI</center>
<center>ENGL 54.41</center>
<center>Dartmouth College</center>
<center>Winter 2026</center>
<pre>Created: 01/29/2026</pre>

In [None]:
import numpy as np
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
from glob import glob
import torch
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as T
import gc
import pandas as pd 

import plotly.io as pio

from sklearn.manifold import TSNE
import base64
import plotly.graph_objects as go      

In [None]:
# This cell of code will determine if we have an accelerator for running
# our neural networks.
# mps == Apple Silicon device (MX series of Macbooks)
# cuda == Compute Unified Device Architecture is a toolkit from Nvidia and means we have a GPU
# cpu == Just using the general-purpose CPU for our calculations

if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('Using device: {0}'.format(device))

In [None]:
# we are loading a transformer neural network (more on this architecture later this term)
# there are three components that we need: the model, the image processor, and the tokenizer
# we'll learn more about tokenization later, for now just know that this 
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14",
                                  dtype=torch.float16,
                                 device_map="auto")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14",
                                          dtype = torch.float16,
                                          use_fast = False,
                                          clean_up_tokenization_spaces = True,
                                         device_map = "auto")

In [None]:
# Download and expand our data. This is a sample from CelebA. 
# https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
# What do we make of these images? How were they collected? What are the
# considerations we might take into an analysis of this dataset?
!wget https://github.com/jeddobson/ENGL54.41-26W/raw/refs/heads/main/data/celeba_sample.tgz
!tar -xf celeba_sample.tgz # extract data from tarball 

In [None]:
# let's look at a sample image -- the first in our directory of files
plt.imshow(Image.open(glob("celeba_sample/*.jpg")[0]))
plt.show() 

In [None]:
# extract neural representations for these sample 250 images -- this will take some time
embs = []
images = glob("celeba_sample/*.jpg")
for img in images:
    image = Image.open(img)
    inputs = processor(images=[image], return_tensors="pt")
    inputs = inputs['pixel_values'].to(device)

    with torch.no_grad():
        outputs = model.get_image_features(inputs).to('cpu')
    embs.append(outputs)
    
    # free up some memory as we go along
    del image, inputs
    gc.collect()
    
# combine all the embeddings together in a tensor matrix
embeddings = torch.cat(embs, dim=0)

In [None]:
# num images x embedding size
embeddings.shape

In [None]:
# use t-SNE to reduce to two dimensions
tsne = TSNE(n_components=2, perplexity=30, max_iter=1000, random_state=42)
embeddings_2d = tsne.fit_transform(embeddings)

pio.renderers.default = "colab"

# store in Pandas DataFrame
vis = pd.DataFrame({"x": embeddings_2d[:, 0], "y": embeddings_2d[:, 1], "label": images})

# load small images for displaying on plot
def encode_img(img_path):
    with open(img_path, 'rb') as f:
        return "data:image/jpeg;base64," + base64.b64encode(f.read()).decode()
vis['encoded_img'] = vis['label'].apply(encode_img)
fig = go.Figure()

# plot images, see plotly documentation: https://plotly.com/python/images/#adding-images-to-subplots
for _, row in vis.iterrows():
    fig.add_layout_image(
        dict(
            source=row['encoded_img'],
            x=row['x'],
            y=row['y'],
            xref="x",
            yref="y",
            sizex=3,
            sizey=3,
            xanchor="center",
            yanchor="middle",
            layer="above"
        )
    )

# add invisible markers for image filename
fig.add_trace(go.Scatter(
    x=vis['x'],
    y=vis['y'],
    mode='markers',
    marker=dict(opacity=0),
    text=vis['label'],
    hoverinfo='text'
))

# layout
fig.update_layout(
    title="t-SNE Plot of CLIP Embeddings",
    width=1024,
    height=800,
    xaxis=dict(
        visible=True,
        showgrid=True,
        zeroline=True,
        zerolinecolor='lightgray',
        gridcolor='lightgray',
    ),
    yaxis=dict(
        visible=True,
        showgrid=True,
        zeroline=True,
        zerolinecolor='lightgray',
        gridcolor='lightgray',
    ),
    plot_bgcolor='white'
)
fig.show()