# Assignment 1: Representation, space and embedding (10 pts in total)

In [8]:
# Install the gensim library
!pip install gensim

# Import packages
import gensim.downloader
import numpy as np



You will load a small pre-trained word-embedding model

In [9]:
# Load the pre-trained GloVe model
model = gensim.downloader.load('glove-wiki-gigaword-50')

In [10]:
# Get the embedding vector for the word "woman"
woman_vector = model['woman']

#### ✏️ Do it yourself (1 pt):
Get the dimension of this embedding space.

In [11]:
# Insert your code here
embedding_dim = len(woman_vector)
print(f"The dimension of the embedding space is {embedding_dim}.")

The dimension of the embedding space is 50.


#### ✏️ Do it yourself (1 pt):
Get embeddings for the words “queen”, “uncle” and “tree.”

In [12]:
# Insert your code here
queen_vector = model['queen']
uncle_vector = model['uncle']
tree_vector = model['tree']

#### ✏️ Do it yourself (2 pts):
Compute the Euclidean distance between the embeddings of the following word pairs: (1) "woman" and "queen", (2) "woman" and "uncle", and (3) "woman" and "tree". \
_Hint: use `np.linalg.norm` to compute L2 norm_

In [14]:
# Insert your code here
def euclidean_distance(v1,v2):
  return np.linalg.norm(v1-v2)

ed_woman_queen = euclidean_distance(woman_vector, queen_vector)
ed_woman_uncle = euclidean_distance(woman_vector, uncle_vector)
ed_woman_tree = euclidean_distance(woman_vector, tree_vector)

print(f"The euclidean distance between woman and queen is: {ed_woman_queen}")
print(f"The euclidean distance between woman and uncle is: {ed_woman_uncle}")
print(f"The euclidean distance between woman and tree is: {ed_woman_tree}")


The distance between woman and queen is: 4.825778961181641
The distance between woman and uncle is: 4.893293857574463
The distance between woman and tree is: 6.05571174621582


#### ✏️ Do it yourself (2 pts):
Compute the cosine distance between the embeddings of the following word pairs: (1) "woman" and "queen", (2) "woman" and "uncle", and (3) "woman" and "tree". \
_Hint: use `@` to compute dot product_

In [17]:
# Insert your code here
def cos_distance(v1,v2):
  dot_product = v1@v2
  norm_product = np.linalg.norm(v1)*np.linalg.norm(v2)
  return 1-(dot_product/norm_product)

cd_woman_queen = cos_distance(woman_vector, queen_vector)
cd_woman_uncle = cos_distance(woman_vector, uncle_vector)
cd_woman_tree = cos_distance(woman_vector, tree_vector)

print(f"The cosine distance between woman and queen is: {cd_woman_queen}")
print(f"The cosine distance between woman and uncle is: {cd_woman_uncle}")
print(f"The cosine distance between woman and tree is: {cd_woman_tree}")

The cosine distance between woman and queen is: 0.3996894359588623
The cosine distance between woman and uncle is: 0.4503743648529053
The cosine distance between woman and tree is: 0.6251512169837952


#### ✏️ Do it yourself (1 pt):
Compute the cosine similarity between the embeddings of the following word pairs: (1) "woman" and "queen", (2) "woman" and "uncle", and (3) "woman" and "tree". \
_Hint: compute consine similarity from consine distance_

In [18]:
# Insert your code here
def cos_similarity(v1,v2):
  return 1-cos_distance(v1,v2)

cs_woman_queen = cos_similarity(woman_vector, queen_vector)
cs_woman_uncle = cos_similarity(woman_vector, uncle_vector)
cs_woman_tree = cos_similarity(woman_vector, tree_vector)

print(f"The cosine similarity between woman and queen is: {cs_woman_queen}")
print(f"The cosine similarity between woman and uncle is: {cs_woman_uncle}")
print(f"The cosine similarity between woman and tree is: {cs_woman_tree}")

The cosine similarity between woman and queen is: 0.6003105640411377
The cosine similarity between woman and uncle is: 0.5496256351470947
The cosine similarity between woman and tree is: 0.37484878301620483


#### ✏️ Do it yourself (1 pts):
Combining the above embedding examples, explain why cosine distance is generally preferred over Euclidean distance when comparing embeddings.

Write your answer here:
> **Answer:**
>
> Cosine distance is generally preferred over Euclidean distance because it focuses on the angle between the two vectors, not their magnitudes. When many dimensions are involved, Euclidean distances become less meaningful, but cosine distances don't depend on the absolute value / magnitudes. Also, cosine distances are bounded between -1 and 1, wherease Euclidean distances can vary a lot depending on embedded dimensions.
>   
>   


### Get embedding of an image from a vision model

In [None]:
# Import packages
import torch
from torchvision import models, transforms
from PIL import Image
from IPython.display import display

In [None]:
# Download the image file
!wget -O surfing.png "https://drive.google.com/uc?export=download&id=1drpMOkT81nX2GwlvOvjRvs1-YwKlEIQs" -q

# Display the image
img = Image.open("surfing.png")
display(img)

Load in the AlexNet model.
Visualize the model.

In [None]:
# Depending on the version of torchvision, you might need to use:
# model = models.alexnet(pretrained=True) # for torchvision versions < 0.13
model = models.alexnet(weights='IMAGENET1K_V1') # for torchvision versions >= 0.13

# Make the model in evaluation mode, so it does not update weights
model.eval()

### Demo on how to get activations (i.e., embeddings) from a specific layer of the model
1. Read in the image
2. Preprocess the image to fit the input size of the model
3. Define the hook function (a callback function) and attach it to a specific layer
4. Forward-pass the image to obtain activations (embeddings) from the specified layer

In [None]:
# Set device for computation
device = "cuda" if torch.cuda.is_available() else "cpu"

# Open the image file
input_img = Image.open("surfing.png")

# Preprocess the image to fit the input size of the model
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

# Apply the preprocessing to the input image
input_tensor = preprocess(input_img)

# Add a batch dimension so the model can process the image
x = input_tensor.unsqueeze(0).to(device)  # (1, 3, 224, 224)


# Define a dictionary to store the activations
activations = {}

# Define the hook function. This function will be called during the forward pass.
def hook_fn(name):
    def _hook(module, input, output):
        activations[name] = output.detach()
    return _hook

# Register the hook on the fc7 layer
handle = model.classifier[4].register_forward_hook(hook_fn("fc7"))

# Run the model with the input image to trigger the hook
with torch.no_grad():
    _ = model(x)

# Remove the hook in case you want to register another one later
handle.remove()

# Get the embedding from the fc7 layer
emb = activations["fc7"]

Next, you will download two other images and get their embeddings.

In [None]:
# Download two other image files
!wget -O img01.png "https://drive.google.com/uc?export=download&id=1Y5nIJ_0VOHTWWx2LMpE-oiJuwXoWjU-N" -q
!wget -O img02.png "https://drive.google.com/uc?export=download&id=1zvHbYiO-6N4kma-KLi0XBYSwE2niadjQ" -q

# Display the first image
display(Image.open("img01.png"))

# Display the second image
display(Image.open("img02.png"))

#### ✏️ Do it yourself (2 pts):
Get the embeddings of 'img01' and 'img02' from the _fc7_ layer of Alexnet (1 pt) and compute the cosine distance between the embeddings of image pairs: (1) "surfing" and "img01", (2) "surfing" and "img02" (1 pt)

In [None]:
# Insert your code here