This notebook is used to proof that the specific column of the Pearson's covariance matrix can be calculated column by column from $x$. 

This is adjust from the `test2.ipynb`

In [55]:
import numpy as np

def generate_x(n:int):
    # Create a nxn array with random positive values
    matrix_np = np.abs(np.random.randn(n, n))  # Use absolute values for positive entries
    # Make the array symmetric
    matrix_np = matrix_np + matrix_np.T
    matrix_np = matrix_np.astype("float64") # Sparse
    return matrix_np 

x = generate_x(n=5)
x

array([[0.50096432, 1.8971359 , 1.160926  , 3.38979981, 1.01689986],
       [1.8971359 , 2.60831351, 2.71217697, 3.44443043, 3.85629207],
       [1.160926  , 2.71217697, 1.71142914, 0.18357595, 1.81075194],
       [3.38979981, 3.44443043, 0.18357595, 0.1671944 , 1.76541959],
       [1.01689986, 3.85629207, 1.81075194, 1.76541959, 1.91193823]])

In [56]:
x -= x.mean(axis=1, keepdims=True)
n = len(x[0])

std = np.std(x, axis=1)
std = np.array([std])
std_mat = std.T @ std # Dense, but can be stored as an 1D array.

cov = x @ x.T / n
corr = (x @ x.T / std_mat) / n # Dense

corr -= corr.mean(axis=1, keepdims=True)
corr_cov = corr @ corr.T / n

# print(corr_cov, '\n')
# print(np.cov(corr, bias=True), '\n')

print(std_mat, '\n')

[[1.00642187 0.6858475  0.83421153 1.45455845 0.95011544]
 [0.6858475  0.46738531 0.56849112 0.99123967 0.6474763 ]
 [0.83421153 0.56849112 0.69146835 1.20566679 0.78753978]
 [1.45455845 0.99123967 1.20566679 2.10223998 1.37318007]
 [0.95011544 0.6474763  0.78753978 1.37318007 0.8969592 ]] 



In [57]:
# Prove that the there's no need to calculate the full std_mat.
std_f = std.flatten()
for i in range(n):
    print(std_f[i] * std_f)

[1.00642187 0.6858475  0.83421153 1.45455845 0.95011544]
[0.6858475  0.46738531 0.56849112 0.99123967 0.6474763 ]
[0.83421153 0.56849112 0.69146835 1.20566679 0.78753978]
[1.45455845 0.99123967 1.20566679 2.10223998 1.37318007]
[0.95011544 0.6474763  0.78753978 1.37318007 0.8969592 ]


In [58]:
print(corr, '\n')
print(corr_cov, '\n')

[[ 0.84071453  0.28247872 -0.61403626 -0.63385819  0.1247012 ]
 [ 0.27885878  0.8370946  -0.32933293 -0.70611322 -0.08050724]
 [-0.78159808 -0.49327482  0.67315271  0.21260895  0.38911124]
 [-0.64256063 -0.71119573  0.37146833  0.83201209  0.15027594]
 [-0.19613478 -0.39772328  0.23583709 -0.16185759  0.51987856]] 

[[ 0.31619245  0.22213214 -0.25920439 -0.29556817 -0.050926  ]
 [ 0.22213214  0.27840541 -0.20680336 -0.29929031 -0.07857181]
 [-0.25920439 -0.20680336  0.30079206  0.26769226  0.13522361]
 [-0.29556817 -0.29929031  0.26769226  0.35429985  0.08799027]
 [-0.050926   -0.07857181  0.13522361  0.08799027  0.10974868]] 



In [59]:
index_s = -1
corr_s = (x @ x[index_s].T / (std_f[index_s] * std_f)) / n 
corr_s -= corr_s.mean()
corr_s

array([-0.19613478, -0.39772328,  0.23583709, -0.16185759,  0.51987856])

In [60]:
corr_s_cov = []

for i in range(n):
    corr_i = (x @ x[i].T / (std_f[i] * std_f)) / n 
    corr_i -= corr_i.mean()
    item = corr_i @ corr_s
    corr_s_cov.append(item)

corr_s_cov = np.array(corr_s_cov)
corr_s_cov = corr_s_cov / n
corr_s_cov

array([-0.050926  , -0.07857181,  0.13522361,  0.08799027,  0.10974868])