# <center>Critical AI</center>
<center>ENGL 54.41</center>
<center>Dartmouth College</center>
<center>Fall 2024</center>
<pre>Created: 10/03/2024

In [None]:
import numpy as np
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as T
from glob import glob
import cv2

from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# This cell of code will determine if we have an accelerator for running
# our neural networks.
# mps == Apple Silicon device (MX series of Macbooks)
# cuda == Compute Unified Device Architecture is a toolkit from Nvidia and means we have a GPU
# cpu == Just using the general-purpose CPU for our calculations

if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = torch.device('mps')
elif torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('Using device: {0}'.format(device))

In [None]:
# we are loading a transformer neural network (more on this architecture later this term)
# there are three components that we need: the model, the image processor, and the tokenizer
# we'll learn more about tokenization later, for now just know that this 

model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14",
                                  torch_dtype=torch.float16,
                                  device_map="auto")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14",
                                          torch_dtype=torch.float16,
                                          clean_up_tokenization_spaces=True,
                                          device_map="auto")

In [None]:
# download data
!wget http://jeddobson.github.io/data/wheelock-succession.tgz
!tar -zxf wheelock-succession.tgz

In [None]:
# These are the official portraits of Dartmouth College presidents 
# (the series of presidents is known as the "Wheelock Succession").
wheelock_data = list()
wheelock_succession = glob('wheelock-succession/*')

In [None]:
# Display the official portraits
fig = plt.figure(figsize=(10, 10))  # width, height in inches
plt.title("The Wheelock Succession (Unordered)")
for i,idx in enumerate(wheelock_succession):
    img = fig.add_subplot(4, 5, i + 1)
    port = cv2.imread(wheelock_succession[i])
    port = cv2.cvtColor(port, cv2.COLOR_BGR2RGB)
    img.imshow(port)
    img.axis('off')
plt.show()

In [None]:
# Now we will load these images and batch process them with CLIP
# We'll just use a placeholder label ("president") to obtain
# a representation of the image from the CLIP neural network.
data = [Image.open(img) for img in wheelock_succession]
inputs = processor(text=["president"] * len(wheelock_succession), images=data, return_tensors="pt", padding=True)
outputs = model(**inputs.to(device))
wheelock_data = outputs['image_embeds'].to('cpu').detach().numpy()

In [None]:
# Calculate Cosine Similarity
dist_matrix = cosine_similarity(wheelock_data)

In [None]:
# Display the distances as cosine similarity. In this cell, we will take
# the index of 0 for the first image shown above and sort the distances
# to find, in order (well reverse order), the most similar representations
# of these images. What do you notice?

for i, idx in enumerate(np.argsort(dist_matrix[0])[::-1]):
    print(dist_matrix[0][idx])
    img = cv2.imread(wheelock_succession[idx])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.show()

In [None]:
# Now we are going to use a technique called 
# Pricipal Components Analysis or PCA to locate
# the two most meaningful components of the image
# representations (we'll call these embeddings). These
# two components will provide us with x and y axis data
# that we can plot.
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
plot_data = pca.fit_transform(wheelock_data)
xs, ys = plot_data[:, 0], plot_data[:, 1]

In [None]:
# Now display these as a scatter plot:
fig = plt.figure(figsize=(20, 15))
plt.clf()
plt.title("Wheelock Succession")
plt.style.use('ggplot')
plt.scatter(xs, ys, marker = '^')
for i, w in enumerate(wheelock_succession):
    a = w.split('/')[1]
    plt.annotate(a, xy = (xs[i], ys[i]), xytext = (3, 3),
        textcoords = 'offset points', ha = 'left', va = 'top')
plt.show()  