<a href="https://colab.research.google.com/github/gupta24789/siamese-networks/blob/main/cosine_similarity_intution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- before calculating dot product if you will normalize the vector by l2 norm then cosine similarity and dot product will be same only

In [None]:
import torch
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

In [None]:
np.random.seed(121)

In [None]:
a = np.random.random((2,5))
b = np.random.random((2,5))

In [None]:
a

array([[0.11133083, 0.21076757, 0.23296249, 0.15194456, 0.83017814],
       [0.40791941, 0.5557906 , 0.74552394, 0.24849976, 0.9686594 ]])

In [None]:
b

array([[0.95268418, 0.48984885, 0.01829731, 0.85760612, 0.40493829],
       [0.62247394, 0.29537149, 0.92958852, 0.4056155 , 0.56730065]])

In [None]:
cosine_similarity(a,b)

array([[0.52377609, 0.71841761],
       [0.6276227 , 0.91482373]])

## Numpy normalize

In [None]:
## unnormalize vector
np.dot(a, b.T)

array([[0.68004946, 0.88070619],
       [1.27987503, 1.76143087]])

In [None]:
## After l2 norm : dot product is equivalent to the cosine similarity
normalize_a = normalize(a, norm ='l2')
normalize_b = normalize(b, norm ='l2')
np.dot(normalize_a, normalize_b.T)

array([[0.52377609, 0.71841761],
       [0.6276227 , 0.91482373]])

## Custom Normalize

In [None]:
def normalize_l1(a):
    return a / np.linalg.norm(a)

def normalize_l2(a):
    return a / np.linalg.norm(a,axis=-1, keepdims=True)

def normalize_l2_(x):
    return x / np.sqrt(np.sum(x * x, axis=-1, keepdims=True))

In [None]:
normalize_a = normalize_l2(a)
normalize_b = normalize_l2(b)
np.dot(normalize_a, normalize_b.T)

array([[0.52377609, 0.71841761],
       [0.6276227 , 0.91482373]])

In [None]:
normalize_a = normalize_l2_(a)
normalize_b = normalize_l2_(b)
np.dot(normalize_a, normalize_b.T)

array([[0.52377609, 0.71841761],
       [0.6276227 , 0.91482373]])

## Torch

In [None]:
## Torch Implementation
# p = 1 => l1 norm
# p = 2 => l2 norm
normalize_a = torch.nn.functional.normalize(torch.tensor(a), p = 2)
normalize_b = torch.nn.functional.normalize(torch.tensor(b), p = 2)
torch.matmul(normalize_a, normalize_b.T)

tensor([[0.5238, 0.7184],
        [0.6276, 0.9148]], dtype=torch.float64)