# Mizan vs Cosine Similarity — Proof & Experiments

This notebook demonstrates **when** and **why** Mizan similarity behaves differently from cosine similarity, using simple controlled experiments.


In [None]:
import numpy as np
import math
from pprint import pprint


In [None]:
def cosine_similarity(v1, v2, eps=1e-8):
    v1 = np.asarray(v1, dtype=float)
    v2 = np.asarray(v2, dtype=float)
    num = float(np.dot(v1, v2))
    den = float(np.linalg.norm(v1) * np.linalg.norm(v2) + eps)
    return num / den

def mizan_distance(v1, v2, p=2.0, eps=1e-8):
    v1 = np.asarray(v1, dtype=float)
    v2 = np.asarray(v2, dtype=float)
    diff = np.linalg.norm(v1 - v2)
    num = diff ** p
    den = (np.linalg.norm(v1) ** p + np.linalg.norm(v2) ** p + eps)
    return float(num / den)

def mizan_similarity(v1, v2, p=2.0, eps=1e-8):
    return 1.0 - mizan_distance(v1, v2, p=p, eps=eps)

def print_header(title):
    print("\n" + "="*80)
    print(title)
    print("="*80)


In [None]:
v1 = np.array([1.0, 2.0, 3.0])
ks = [1, 2, 3, 5, 10]

print_header("Test 1: Scaling v2 = k * v1")
print(f"{'k':>5} | {'cosine':>10} | {'mizan':>10}")
print("-"*35)
for k in ks:
    v2 = k * v1
    cos = cosine_similarity(v1, v2)
    miz = mizan_similarity(v1, v2, p=2.0)
    print(f"{k:5} | {cos:10.4f} | {miz:10.4f}")


In [None]:
v_clean = np.array([10.0, 10.0, 10.0])
v_noisy = np.array([10.0, 10.0, 1000.0])

print_header("Test 2: Outlier dimension")
print("v_clean:", v_clean)
print("v_noisy:", v_noisy)

cos = cosine_similarity(v_clean, v_noisy)
miz = mizan_similarity(v_clean, v_noisy, p=2.0)
dist = mizan_distance(v_clean, v_noisy, p=2.0)

print(f"cosine(v_clean, v_noisy) = {cos:.4f}")
print(f"mizan_similarity(v_clean, v_noisy) = {miz:.6f}")
print(f"mizan_distance(v_clean, v_noisy)   = {dist:.6f}")


In [None]:
np.random.seed(42)
base = np.random.randn(8)
v_small = 0.1 * base
v_big = 10.0 * base

print_header("Test 3: Different scales of the same direction")
print("base   :", base)
print("v_small:", v_small)
print("v_big  :", v_big)

cos_small_big = cosine_similarity(v_small, v_big)
miz_small_big = mizan_similarity(v_small, v_big)

print(f"cosine(v_small, v_big) = {cos_small_big:.6f}")
print(f"mizan(v_small, v_big)  = {miz_small_big:.6f}")


In [None]:
def normalize(v):
    v = np.asarray(v, dtype=float)
    n = np.linalg.norm(v) + 1e-8
    return v / n

a = np.random.randn(8)
b = a + 0.5 * np.random.randn(8)

a_n = normalize(a)
b_n = normalize(b)

print_header("Test 4: Normalized vectors")
print("a_n:", a_n)
print("b_n:", b_n)

cos_n = cosine_similarity(a_n, b_n)
miz_n = mizan_similarity(a_n, b_n)

print(f"cosine(normalized a, normalized b) = {cos_n:.6f}")
print(f"mizan(normalized a, normalized b)  = {miz_n:.6f}")


In [None]:
try:
    import matplotlib.pyplot as plt

    v = np.array([1.0, 2.0, 3.0])
    ks = np.linspace(0.1, 5.0, 50)

    cos_vals = []
    miz_vals = []

    for k in ks:
        v2 = k * v
        cos_vals.append(cosine_similarity(v, v2))
        miz_vals.append(mizan_similarity(v, v2))

    plt.figure()
    plt.plot(ks, cos_vals, label="cosine")
    plt.plot(ks, miz_vals, label="mizan")
    plt.xlabel("Scale factor k")
    plt.ylabel("Similarity")
    plt.title("Cosine vs Mizan as one vector is scaled")
    plt.legend()
    plt.show()
except ImportError:
    print("matplotlib not installed; skipping plot.")
