This notebook is used to proof that the specific column of the Pearson's covariance matrix can be calculated column by column from $x$. 

In [115]:
import numpy as np

def generate_x(n:int):
    # Create a nxn array with random positive values
    matrix_np = np.abs(np.random.randn(n, n))  # Use absolute values for positive entries
    # Make the array symmetric
    matrix_np = matrix_np + matrix_np.T
    matrix_np = matrix_np.astype("float64") # Sparse
    return matrix_np 

x = generate_x(n=5)
x

array([[1.70211204, 2.0232065 , 0.60164408, 1.49471855, 1.6021923 ],
       [2.0232065 , 0.19967807, 0.33839322, 0.98407353, 3.43774273],
       [0.60164408, 0.33839322, 2.59393913, 1.44558474, 2.02007285],
       [1.49471855, 0.98407353, 1.44558474, 3.03079067, 2.2081618 ],
       [1.6021923 , 3.43774273, 2.02007285, 2.2081618 , 1.17758276]])

In [116]:
x -= x.mean(axis=1, keepdims=True)
n = len(x[0])

std = np.std(x, axis=1)
std = np.array([std])
std_mat = std.T @ std # Dense, but can be stored as an 1D array.

cov = x @ x.T / n
corr = (x @ x.T / std_mat) / n # Dense

corr -= corr.mean(axis=1, keepdims=True)
corr_cov = corr @ corr.T / n

# print(corr_cov, '\n')
# print(np.cov(corr, bias=True), '\n')

print(std_mat, '\n')

[[0.22618996 0.57393422 0.40223248 0.34035075 0.3625589 ]
 [0.57393422 1.45630023 1.02062436 0.86360573 0.91995667]
 [0.40223248 1.02062436 0.71528801 0.60524405 0.64473669]
 [0.34035075 0.86360573 0.60524405 0.51212988 0.54554675]
 [0.3625589  0.91995667 0.64473669 0.54554675 0.58114409]] 



In [117]:
# Prove that the there's no need to calculate the full std_mat.
std_f = std.flatten()
for i in range(n):
    print(std_f[i] * std_f)

[0.22618996 0.57393422 0.40223248 0.34035075 0.3625589 ]
[0.57393422 1.45630023 1.02062436 0.86360573 0.91995667]
[0.40223248 1.02062436 0.71528801 0.60524405 0.64473669]
[0.34035075 0.86360573 0.60524405 0.51212988 0.54554675]
[0.3625589  0.91995667 0.64473669 0.54554675 0.58114409]


In [118]:
print(corr, '\n')
print(corr_cov, '\n')

[[ 0.87762882  0.10825558 -0.98071005 -0.20023831  0.19506396]
 [ 0.04945133  0.81882458 -0.03345737  0.16290133 -0.99771987]
 [-0.87748665  0.12857028  0.98085223  0.30996686 -0.54190273]
 [-0.31240762  0.10953626  0.09457414  0.76545951 -0.65716229]
 [ 0.40633235 -0.72764723 -0.43385774 -0.33372459  1.08889721]] 

[[ 0.36437783 -0.01247681 -0.37717874 -0.12730656  0.19651096]
 [-0.01247681  0.33920406  0.1240456   0.17028721 -0.34039692]
 [-0.37717874  0.1240456   0.42766445  0.194873   -0.31383506]
 [-0.12730656  0.17028721  0.194873    0.2272663  -0.24374233]
 [ 0.19651096 -0.34039692 -0.31383506 -0.24374233  0.43597565]] 



In [119]:
index_s = 3
corr_s = (x @ x[index_s].T / std_mat[index_s]) / n 
corr_s -= corr_s.mean()
corr_s

array([-0.31240762,  0.10953626,  0.09457414,  0.76545951, -0.65716229])

In [120]:
corr_s_cov = []

for i in range(n):
    corr_i = (x @ x[i].T / std_mat[i]) / n 
    corr_i -= corr_i.mean()
    item = corr_i @ corr_s
    corr_s_cov.append(item)

corr_s_cov = np.array(corr_s_cov)
corr_s_cov = corr_s_cov / n
corr_s_cov

array([-0.12730656,  0.17028721,  0.194873  ,  0.2272663 , -0.24374233])