In [1]:
import numpy as np

labels = ["A", "B", "C", "D", "E"]
X = np.array([
    [2,  5],   # A (index 0)
    [1, -1],   # B (index 1)
    [3,  4],   # C (index 2)
    [8, -2],   # D (index 3)
    [9, -1],   # E (index 4)
], dtype=float)

X


array([[ 2.,  5.],
       [ 1., -1.],
       [ 3.,  4.],
       [ 8., -2.],
       [ 9., -1.]])

In [2]:
from math import sqrt

def euclidean(a, b):
    return sqrt(((a - b) ** 2).sum())

n = len(X)
D0 = np.zeros((n, n), dtype=float)
for i in range(n):
    for j in range(n):
        D0[i, j] = euclidean(X[i], X[j])

print("Point-to-point distance matrix D0 (Euclidean):")
print("      " + "  ".join(f"{lab:>6s}" for lab in labels))
for i, lab in enumerate(labels):
    row = "  ".join(f"{D0[i,j]:6.3f}" for j in range(n))
    print(f"{lab:>3s}  {row}")

D0


Point-to-point distance matrix D0 (Euclidean):
           A       B       C       D       E
  A   0.000   6.083   1.414   9.220   9.220
  B   6.083   0.000   5.385   7.071   8.000
  C   1.414   5.385   0.000   7.810   7.810
  D   9.220   7.071   7.810   0.000   1.414
  E   9.220   8.000   7.810   1.414   0.000


array([[0.        , 6.08276253, 1.41421356, 9.21954446, 9.21954446],
       [6.08276253, 0.        , 5.38516481, 7.07106781, 8.        ],
       [1.41421356, 5.38516481, 0.        , 7.81024968, 7.81024968],
       [9.21954446, 7.07106781, 7.81024968, 0.        , 1.41421356],
       [9.21954446, 8.        , 7.81024968, 1.41421356, 0.        ]])

In [3]:
from scipy.spatial.distance import pdist, squareform

Y = pdist(X, metric="euclidean")     # condensed vector
D_sq = squareform(Y)                # expanded NxN matrix

print("Condensed pdist vector Y (SciPy’s internal compact format):")
print(Y)

print("\nDoes squareform(Y) match our D0?")
print(np.allclose(D_sq, D0))


Condensed pdist vector Y (SciPy’s internal compact format):
[6.08276253 1.41421356 9.21954446 9.21954446 5.38516481 7.07106781
 8.         7.81024968 7.81024968 1.41421356]

Does squareform(Y) match our D0?
True


In [4]:
D_sq

array([[0.        , 6.08276253, 1.41421356, 9.21954446, 9.21954446],
       [6.08276253, 0.        , 5.38516481, 7.07106781, 8.        ],
       [1.41421356, 5.38516481, 0.        , 7.81024968, 7.81024968],
       [9.21954446, 7.07106781, 7.81024968, 0.        , 1.41421356],
       [9.21954446, 8.        , 7.81024968, 1.41421356, 0.        ]])

In [6]:
from scipy.cluster.hierarchy import linkage

Z_scipy = linkage(X, method="complete", metric="euclidean")

print("SciPy linkage matrix Z_scipy:")
print(Z_scipy)


SciPy linkage matrix Z_scipy:
[[0.         2.         1.41421356 2.        ]
 [3.         4.         1.41421356 2.        ]
 [1.         5.         6.08276253 3.        ]
 [6.         7.         9.21954446 5.        ]]


In [7]:
Z_scipy

array([[0.        , 2.        , 1.41421356, 2.        ],
       [3.        , 4.        , 1.41421356, 2.        ],
       [1.        , 5.        , 6.08276253, 3.        ],
       [6.        , 7.        , 9.21954446, 5.        ]])

In [8]:
X

array([[ 2.,  5.],
       [ 1., -1.],
       [ 3.,  4.],
       [ 8., -2.],
       [ 9., -1.]])

In [9]:
X.sum(axis=0)

array([23.,  5.])

In [10]:
X.sum(axis=1)

array([7., 0., 7., 6., 8.])