Compare the difference.<br>

In [1]:
import numpy as np
import sympy
from sympy import lambdify
np.set_printoptions(suppress=True)
np.set_printoptions(precision=2)

In [2]:
def generate_oe_matrix(variance_threshold_L = 0.5, variance_threshold_U = 0.6):
    """Generates a random 5x5 symmetric matrix with positive values and
       the first PC explaining at least L% and at most U% of the variance.
    """

    while True:
        # Create a nxn array with random positive values
        matrix = np.abs(np.random.randn(6, 6) * 10)  # Use absolute values for positive entries
        # Make the array symmetric
        matrix = matrix + matrix.T
        
        origin_matrix = matrix

        # Subtract the mean of each row to center the data
        matrix = matrix - matrix.mean(axis=1, keepdims=True)

        # SVD 
        n = len(matrix[0])
        matrix_t = np.transpose(matrix)
        y = matrix_t / np.sqrt(n)
        U, S, Vh = np.linalg.svd(y, full_matrices=True)
        eigenvalues = S * S
        EV1_explained_variance = eigenvalues[0] / np.sum(eigenvalues) 

        # Check if the first PC explains at least the desired variance
        if EV1_explained_variance >= variance_threshold_L and EV1_explained_variance < variance_threshold_U:
            print(EV1_explained_variance, '\n')
            print(origin_matrix, '\n')
            return origin_matrix

# Generate a matrix that meets the conditions
oe = generate_oe_matrix()
corr_oe = np.corrcoef(oe)

0.5543560869957909 

[[12.71  7.55 21.55 22.32 16.11  2.62]
 [ 7.55 58.12 11.04  9.17  2.64  7.07]
 [21.55 11.04 10.86  6.48 28.39 33.14]
 [22.32  9.17  6.48 12.95 15.2  18.21]
 [16.11  2.64 28.39 15.2   0.99  5.77]
 [ 2.62  7.07 33.14 18.21  5.77 14.77]] 



In [3]:
# Subtract the mean of each row to center the data
oe_zero = oe - oe.mean(axis=1, keepdims=True)
corr_oe_zero = corr_oe - corr_oe.mean(axis=1, keepdims=True)

### PCA
MOSAIC perform PCA on O/E matrix <br>

In [4]:
n = len(oe_zero[0])
x_t = np.transpose(oe_zero)
y = x_t / np.sqrt(n)

U, S, Vh = np.linalg.svd(y, full_matrices=True)
eigenvalues = S * S
sum_eigenvalues = np.sum(eigenvalues)
explained_variances = eigenvalues / sum_eigenvalues
print('explained_variances')
print(explained_variances)

cov_x = np.matmul(np.transpose(y), y)
V_Cx = np.matmul(Vh, cov_x)

print('Vh[0]')
print(Vh[0])
print('corr_oe')
print(np.corrcoef(oe))
print('cov_x')
print(cov_x)

cov_x_sum = [np.sum(np.abs(row)) for row in cov_x]
print('cov_x_sum')
print(cov_x_sum) 

explained_variances
[0.55 0.33 0.07 0.04 0.01 0.  ]
Vh[0]
[ 0.13 -0.94  0.19  0.11  0.18  0.13]
corr_oe
[[ 1.   -0.35 -0.62 -0.4   0.62  0.49]
 [-0.35  1.   -0.43 -0.47 -0.3  -0.19]
 [-0.62 -0.43  1.    0.65 -0.5  -0.41]
 [-0.4  -0.47  0.65  1.   -0.29 -0.66]
 [ 0.62 -0.3  -0.5  -0.29  1.    0.74]
 [ 0.49 -0.19 -0.41 -0.66  0.74  1.  ]]
cov_x
[[ 50.56 -46.95 -43.21 -15.07  42.09  35.4 ]
 [-46.95 362.45 -80.47 -47.15 -54.92 -36.11]
 [-43.21 -80.47  96.66  34.17 -46.85 -41.64]
 [-15.07 -47.15  34.17  28.24 -14.4  -36.02]
 [ 42.09 -54.92 -46.85 -14.4   90.29  71.63]
 [ 35.4  -36.11 -41.64 -36.02  71.63 104.83]]
cov_x_sum
[233.2887211068036, 628.0475132483288, 342.99427947713866, 175.05979833533155, 320.1811487128789, 325.6265643803839]


HOMER perform PCA on correlation matrix <br>

In [5]:
n = len(corr_oe_zero[0])
x_t = np.transpose(corr_oe_zero)
y = x_t / np.sqrt(n)

U, S, Vh = np.linalg.svd(y, full_matrices=True)
eigenvalues = S * S
sum_eigenvalues = np.sum(eigenvalues)
explained_variances = eigenvalues / sum_eigenvalues
print('explained_variances')
print(explained_variances)

cov_x = np.matmul(np.transpose(y), y)
V_Cx = np.matmul(Vh, cov_x)

print('Vh[0]')
print(Vh[0])
print('corr_oe')
print(np.corrcoef(oe))
print('cov_x')
print(cov_x)

cov_x_sum = [np.sum(np.abs(row)) for row in cov_x]
print('cov_x_sum')
print(cov_x_sum) 

explained_variances
[0.75 0.21 0.03 0.01 0.   0.  ]
Vh[0]
[-0.44 -0.04  0.47  0.44 -0.43 -0.46]
corr_oe
[[ 1.   -0.35 -0.62 -0.4   0.62  0.49]
 [-0.35  1.   -0.43 -0.47 -0.3  -0.19]
 [-0.62 -0.43  1.    0.65 -0.5  -0.41]
 [-0.4  -0.47  0.65  1.   -0.29 -0.66]
 [ 0.62 -0.3  -0.5  -0.29  1.    0.74]
 [ 0.49 -0.19 -0.41 -0.66  0.74  1.  ]]
cov_x
[[ 0.37 -0.07 -0.3  -0.25  0.33  0.32]
 [-0.07  0.26 -0.13 -0.15 -0.08 -0.03]
 [-0.3  -0.13  0.4   0.36 -0.28 -0.3 ]
 [-0.25 -0.15  0.36  0.39 -0.24 -0.31]
 [ 0.33 -0.08 -0.28 -0.24  0.35  0.34]
 [ 0.32 -0.03 -0.3  -0.31  0.34  0.38]]
cov_x_sum
[1.6396344948773771, 0.7082331691686853, 1.771831144319623, 1.7057074520937712, 1.614210379129011, 1.6724815809186344]
