# Cosine Similarity

In [34]:
import numpy as np
import torch
import torch.nn.functional as F

In [74]:
n_docs = 128
n_embed = 32
dtype = torch.float16
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

np.random.rand(n_docs, n_embed)
corpus = torch.tensor(np.random.rand(n_docs, n_embed), dtype=dtype, device=device)
query = torch.tensor(np.random.rand(n_embed), dtype=dtype, device=device)

## LL25

In [94]:
def batched_cosine_similarity(query, corpus, eps=1e-6):
    dot_product = (query * corpus).sum(dim=1)

    norm_query = torch.sqrt((query ** 2).sum(dim=1) + eps)
    norm_corpus = torch.sqrt((corpus ** 2).sum(dim=1) + eps)

    return dot_product / (norm_query * norm_corpus)


sim_manual = batched_cosine_similarity(query.unsqueeze(0), corpus)
sim_builtin = F.cosine_similarity(query.unsqueeze(0), corpus, eps=1e-6)
torch.allclose(sim_manual, sim_builtin, atol=1e-3)

True

## LL3

In [97]:
def batched_cosine_similarity_LL3(query, corpus, eps):
    dot_product = (query * corpus).sum(dim=1)
    query_norm = torch.sqrt((query ** 2).sum(dim=1) + eps)
    corpus_norm = torch.sqrt((corpus ** 2).sum(dim=1) + eps)

    return dot_product / (query_norm * corpus_norm)


sim_manual = batched_cosine_similarity_LL3(query.unsqueeze(0), corpus, eps=1e-3)
sim_builtin = F.cosine_similarity(query.unsqueeze(0), corpus, eps=1e-3)
torch.allclose(sim_manual, sim_builtin, atol=1e-3)

True