# Meaning of $X^{\mathsf {T}}X$

In [1]:
import numpy as np

In [2]:
X = np.array([[65, 175, 25], [67, 183, 21], [52, 165, 22], [68, 170, 39], [45, 142, 10]])
X

array([[ 65, 175,  25],
       [ 67, 183,  21],
       [ 52, 165,  22],
       [ 68, 170,  39],
       [ 45, 142,  10]])

In [3]:
def sscp(M): # sum of squares cross products
    return M.T @ M

sscp(X)

array([[ 18067,  50166,   7278],
       [ 50166, 140403,  19898],
       [  7278,  19898,   3171]])

In [4]:
center = np.mean(X, axis=0)
X_centered = X - center
print(sscp(X_centered)/(5-1)) # Covariance
print(np.cov(X.T))

[[106.3  141.75  82.05]
 [141.75 239.5   89.75]
 [ 82.05  89.75 108.3 ]]
[[106.3  141.75  82.05]
 [141.75 239.5   89.75]
 [ 82.05  89.75 108.3 ]]


In [5]:
std = np.std(X, axis=0)
X_std = (X - center) / std
print(sscp(X_std)/5) # Pearson correlation coefficient
print(np.corrcoef(X.T))

[[1.         0.88839003 0.76471168]
 [0.88839003 1.         0.55727232]
 [0.76471168 0.55727232 1.        ]]
[[1.         0.88839003 0.76471168]
 [0.88839003 1.         0.55727232]
 [0.76471168 0.55727232 1.        ]]


In [6]:
X_norm = X / np.linalg.norm(X, axis=0)
print(sscp(X_norm)) # cosine similarity

[[1.         0.99604339 0.96154787]
 [0.99604339 1.         0.94302479]
 [0.96154787 0.94302479 1.        ]]


## References

- https://towardsdatascience.com/x%E1%B5%80x-covariance-correlation-and-cosine-matrices-d2230997fb7