<a href="https://colab.research.google.com/github/inderpreetsingh01/ml_machine_coding/blob/main/similarity_measures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#  cosine similarity

In [1]:
import numpy as np

def cosine_similarity(vec1, vec2):
    """Compute cosine similarity between two 1D vectors"""
    dot = np.dot(vec1, vec2)
    norm1 = np.linalg.norm(vec1)
    norm2 = np.linalg.norm(vec2)
    return dot / (norm1 * norm2 + 1e-9)  # add epsilon for numerical stability

In [2]:
def cosine_similarity_matrix(X, Y=None):
    """
    Compute pairwise cosine similarity between rows of X and Y.
    If Y is None, computes similarity among rows of X.

    X: (n_samples, dim)
    Y: (m_samples, dim)
    """
    if Y is None:
        Y = X

    X_norm = X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-9)
    Y_norm = Y / (np.linalg.norm(Y, axis=1, keepdims=True) + 1e-9)

    return np.dot(X_norm, Y_norm.T)

In [3]:
# Example vectors
v1 = np.array([1, 2, 3])
v2 = np.array([2, 3, 4])

print("Cosine similarity (v1, v2):", cosine_similarity(v1, v2))

# Example matrix
X = np.array([[1, 0], [0, 1], [1, 1]])
sim_matrix = cosine_similarity_matrix(X)

print("Pairwise cosine similarity matrix:\n", np.round(sim_matrix, 3))


Cosine similarity (v1, v2): 0.9925833339216693
Pairwise cosine similarity matrix:
 [[1.    0.    0.707]
 [0.    1.    0.707]
 [0.707 0.707 1.   ]]


In [4]:
import math

def cosine_similarity(vec1, vec2):
    """Cosine similarity between two vectors (lists)"""
    dot = sum(a * b for a, b in zip(vec1, vec2))
    norm1 = math.sqrt(sum(a * a for a in vec1))
    norm2 = math.sqrt(sum(b * b for b in vec2))
    return dot / (norm1 * norm2 + 1e-9)  # epsilon to avoid division by zero

In [5]:
def cosine_similarity_matrix(X, Y=None):
    """
    Compute pairwise cosine similarity between lists of vectors.

    X: list of vectors (e.g., [[1,0],[0,1]])
    Y: list of vectors (optional). If None, compares X with itself.
    """
    if Y is None:
        Y = X

    def norm(v):
        return math.sqrt(sum(a*a for a in v)) + 1e-9

    result = []
    for x in X:
        row = []
        for y in Y:
            dot = sum(a*b for a, b in zip(x, y))
            sim = dot / (norm(x) * norm(y))
            row.append(sim)
        result.append(row)
    return result

In [6]:
v1 = [1, 2, 3]
v2 = [2, 3, 4]

print("Cosine similarity (v1, v2):", cosine_similarity(v1, v2))

X = [[1, 0], [0, 1], [1, 1]]
sim_matrix = cosine_similarity_matrix(X)

print("Pairwise cosine similarity matrix:")
for row in sim_matrix:
    print([round(val, 3) for val in row])

Cosine similarity (v1, v2): 0.9925833339216693
Pairwise cosine similarity matrix:
[1.0, 0.0, 0.707]
[0.0, 1.0, 0.707]
[0.707, 0.707, 1.0]


In [7]:
# euclidean distance (python version)

In [9]:
import math

def euclidean_distance(vec1, vec2):
    """Euclidean distance between two vectors (lists)"""
    return math.sqrt(sum((a - b) ** 2 for a, b in zip(vec1, vec2)))

In [10]:
def euclidean_distance_matrix(X, Y=None):
    """
    Compute pairwise Euclidean distances between lists of vectors.

    X: list of vectors (e.g., [[1,0],[0,1]])
    Y: list of vectors (optional). If None, compares X with itself.
    """
    if Y is None:
        Y = X

    result = []
    for x in X:
        row = []
        for y in Y:
            dist = math.sqrt(sum((a - b) ** 2 for a, b in zip(x, y)))
            row.append(dist)
        result.append(row)
    return result

In [11]:
v1 = [1, 2, 3]
v2 = [2, 3, 4]

print("Euclidean distance (v1, v2):", euclidean_distance(v1, v2))

X = [[1, 0], [0, 1], [1, 1]]
dist_matrix = euclidean_distance_matrix(X)

print("Pairwise Euclidean distance matrix:")
for row in dist_matrix:
    print([round(val, 3) for val in row])

Euclidean distance (v1, v2): 1.7320508075688772
Pairwise Euclidean distance matrix:
[0.0, 1.414, 1.0]
[1.414, 0.0, 1.0]
[1.0, 1.0, 0.0]


In [None]:
# euclidean distance (numpy version)

In [12]:
import numpy as np

def euclidean_distance(vec1, vec2):
    """Euclidean distance between two vectors using NumPy"""
    v1, v2 = np.array(vec1), np.array(vec2)
    return np.sqrt(np.sum((v1 - v2) ** 2))

In [13]:
def euclidean_distance_matrix(X, Y=None):
    """
    Compute pairwise Euclidean distances between sets of vectors using NumPy.

    X: np.array of shape (n_samples, n_features)
    Y: np.array of shape (m_samples, n_features), optional.
       If None, compares X with itself.
    """
    X = np.array(X)
    if Y is None:
        Y = X
    else:
        Y = np.array(Y)

    # Broadcasting trick: (x-y)^2 = x^2 + y^2 - 2xy
    X_sq = np.sum(X ** 2, axis=1).reshape(-1, 1)
    Y_sq = np.sum(Y ** 2, axis=1).reshape(1, -1)
    dists = np.sqrt(X_sq + Y_sq - 2 * np.dot(X, Y.T))
    return dists

In [15]:
v1 = [1, 2, 3]
v2 = [2, 3, 4]

print("Euclidean distance (v1, v2):", euclidean_distance(v1, v2))

X = np.array([[1, 0], [0, 1], [1, 1]])
dist_matrix = euclidean_distance_matrix(X)

print("Pairwise Euclidean distance matrix:")
print(np.round(dist_matrix, 3))

Euclidean distance (v1, v2): 1.7320508075688772
Pairwise Euclidean distance matrix:
[[0.    1.414 1.   ]
 [1.414 0.    1.   ]
 [1.    1.    0.   ]]
