## Different Kinds of Calculating distance

In [16]:
# single_pair_distances.py
import numpy as np

# ---------------------------
# Single-pair distance funcs
# ---------------------------

def euclidean(x, y):
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    return np.linalg.norm(x - y)                      # sqrt(sum((x-y)^2))

def squared_euclidean(x, y):
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    d = x - y
    return float(np.dot(d, d))                        # sum((x-y)^2)

def manhattan(x, y):
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    return float(np.sum(np.abs(x - y)))               # L1

def minkowski(x, y, p=3):
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    return float(np.sum(np.abs(x - y) ** p) ** (1.0 / p))

def chebyshev(x, y):
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    return float(np.max(np.abs(x - y)))

def cosine_distance(x, y):
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    nx, ny = np.linalg.norm(x), np.linalg.norm(y)
    if nx == 0 or ny == 0:
        # If either vector is zero, cosine similarity is undefined; return max distance (1.0)
        return 1.0
    cos_sim = float(np.dot(x, y) / (nx * ny))
    # Numerical safety: clip to [-1,1]
    cos_sim = max(-1.0, min(1.0, cos_sim))
    return 1.0 - cos_sim

def mahalanobis(x, y, cov=None, regularize=1e-8):
    """
    Mahalanobis distance between x and y.
    If `cov` (covariance matrix) is None, compute from x and y only (not ideal).
    Prefer passing a covariance matrix computed from your dataset.
    """
    x, y = np.asarray(x, dtype=float), np.asarray(y, dtype=float)
    diff = x - y
    if cov is None:
        # fallback: estimate 1D variances per feature (diagonal cov) to avoid singular matrix
        eps = regularize
        var = np.var(np.vstack([x, y]), axis=0) + eps
        VI = np.diag(1.0 / var)
    else:
        cov = np.asarray(cov, dtype=float)
        # use pseudo-inverse for stability
        VI = np.linalg.pinv(cov)
    m = float(np.sqrt(np.dot(np.dot(diff.T, VI), diff)))
    return m

# ---------------------------
# Tiny demo (exam-style examples)
# ---------------------------
if __name__ == "__main__":
    a = np.array([1.0, 2.0, 3.0])
    b = np.array([2.0, 0.0, 4.0])

    print("Euclidean:", euclidean(a, b))
    print("Squared Euclidean:", squared_euclidean(a, b))
    print("Manhattan (L1):", manhattan(a, b))
    print("Minkowski p=3:", minkowski(a, b, p=3))
    print("Chebyshev:", chebyshev(a, b))
    print("Cosine distance:", cosine_distance(a, b))

    # For Mahalanobis, show both: 1) using diagonal fallback and 2) using cov computed from a small dataset
    print("Mahalanobis (diag-fallback):", mahalanobis(a, b, cov=None))

    # Better: compute covariance from a small toy dataset (columns = features)
    X_toy = np.vstack([a, b, [2.0, 2.0, 2.0], [0.0, 1.0, 0.0]])
    cov = np.cov(X_toy, rowvar=False)   # shape (d,d)
    print("Mahalanobis (from toy-cov):", mahalanobis(a, b, cov=cov))


Euclidean: 2.449489742783178
Squared Euclidean: 6.0
Manhattan (L1): 4.0
Minkowski p=3: 2.154434690031884
Chebyshev: 2.0
Cosine distance: 0.16333997346592444
Mahalanobis (diag-fallback): 3.4641015631762317
Mahalanobis (from toy-cov): 2.449489742783178
