Compare the difference.<br>

In [11]:
import numpy as np
import sympy
from sympy import lambdify
np.set_printoptions(suppress=True)
np.set_printoptions(precision=2)

In [12]:
def generate_oe_matrix(variance_threshold_L = 0.7, variance_threshold_U = 0.8):
    """Generates a random 5x5 symmetric matrix with positive values and
       the first PC explaining at least L% and at most U% of the variance.
    """

    while True:
        # Create a nxn array with random positive values
        matrix = np.abs(np.random.randn(6, 6) * 10)  # Use absolute values for positive entries
        # Make the array symmetric
        matrix = matrix + matrix.T
        
        origin_matrix = matrix

        # Subtract the mean of each row to center the data
        matrix = matrix - matrix.mean(axis=1, keepdims=True)

        # SVD 
        n = len(matrix[0])
        matrix_t = np.transpose(matrix)
        y = matrix_t / np.sqrt(n)
        U, S, Vh = np.linalg.svd(y, full_matrices=True)
        eigenvalues = S * S
        EV1_explained_variance = eigenvalues[0] / np.sum(eigenvalues) 

        # Check if the first PC explains at least the desired variance
        if EV1_explained_variance >= variance_threshold_L and EV1_explained_variance < variance_threshold_U:
            print(EV1_explained_variance, '\n')
            print(origin_matrix, '\n')
            return origin_matrix

# Generate a matrix that meets the conditions
oe = generate_oe_matrix()
corr_oe = np.corrcoef(oe)

0.7288478997946667 

[[37.93 16.22 11.56  9.81 10.87 28.04]
 [16.22 54.03  4.39 39.99  2.35 12.37]
 [11.56  4.39  3.96 10.95 13.45 20.1 ]
 [ 9.81 39.99 10.95 21.21  9.28 14.54]
 [10.87  2.35 13.45  9.28 21.38 11.49]
 [28.04 12.37 20.1  14.54 11.49 27.51]] 



In [13]:
# Subtract the mean of each row to center the data
oe_zero = oe - oe.mean(axis=1, keepdims=True)
corr_oe_zero = corr_oe - corr_oe.mean(axis=1, keepdims=True)

### PCA
MOSAIC perform PCA on O/E matrix <br>

In [14]:
n = len(oe_zero[0])
x_t = np.transpose(oe_zero)
y = x_t / np.sqrt(n)

U, S, Vh = np.linalg.svd(y, full_matrices=True)
eigenvalues = S * S
sum_eigenvalues = np.sum(eigenvalues)
explained_variances = eigenvalues / sum_eigenvalues
print('explained_variances')
print(explained_variances)

cov_x = np.matmul(np.transpose(y), y)
V_Cx = np.matmul(Vh, cov_x)


print('V_Cx[0]')
print(V_Cx[0])
print('Vh[0]')
print(Vh[0])
print('corr_oe')
print(np.corrcoef(oe))
print('cov_x')
print(cov_x)

cov_x_sum = [np.sum(np.abs(row)) for row in cov_x]
print('cov_x_sum')
print(cov_x_sum)

explained_variances
[0.73 0.21 0.03 0.02 0.01 0.  ]
V_Cx[0]
[ -54.8   423.72  -54.39  233.25 -107.75  -71.97]
Vh[0]
[-0.11  0.84 -0.11  0.46 -0.21 -0.14]
corr_oe
[[ 1.   -0.13  0.42 -0.23 -0.17  0.85]
 [-0.13  1.   -0.36  0.92 -0.87 -0.39]
 [ 0.42 -0.36  1.   -0.43  0.38  0.45]
 [-0.23  0.92 -0.43  1.   -0.83 -0.47]
 [-0.17 -0.87  0.38 -0.83  1.   -0.03]
 [ 0.85 -0.39  0.45 -0.47 -0.03  1.  ]]
cov_x
[[108.95 -26.65  24.04 -25.59 -10.18  60.05]
 [-26.65 361.85 -38.08 189.58 -92.98 -49.77]
 [ 24.04 -38.08  30.33 -25.66  11.76  16.74]
 [-25.59 189.58 -25.66 116.33 -50.52 -34.31]
 [-10.18 -92.98  11.76 -50.52  31.75  -1.22]
 [ 60.05 -49.77  16.74 -34.31  -1.22  45.92]]
cov_x_sum
[255.46334088436885, 758.9075517004069, 146.6113098833724, 441.99186352709813, 198.40988721392043, 208.00256297594615]


HOMER perform PCA on correlation matrix <br>

In [15]:
n = len(corr_oe_zero[0])
x_t = np.transpose(corr_oe_zero)
y = x_t / np.sqrt(n)

U, S, Vh = np.linalg.svd(y, full_matrices=True)
eigenvalues = S * S
sum_eigenvalues = np.sum(eigenvalues)
explained_variances = eigenvalues / sum_eigenvalues
print('explained_variances')
print(explained_variances)

cov_x = np.matmul(np.transpose(y), y)
V_Cx = np.matmul(Vh, cov_x)

print('V_Cx[0]')
print(V_Cx[0])
print('Vh[0]')
print(Vh[0])
print('corr_oe')
print(np.corrcoef(oe))
print('cov_x')
print(cov_x)

cov_x_sum = [np.sum(np.abs(row)) for row in cov_x]
print('cov_x_sum')
print(cov_x_sum) 

explained_variances
[0.79 0.19 0.02 0.   0.   0.  ]
V_Cx[0]
[-0.37  0.91 -0.6   0.93 -0.77 -0.57]
Vh[0]
[-0.21  0.51 -0.34  0.53 -0.44 -0.32]
corr_oe
[[ 1.   -0.13  0.42 -0.23 -0.17  0.85]
 [-0.13  1.   -0.36  0.92 -0.87 -0.39]
 [ 0.42 -0.36  1.   -0.43  0.38  0.45]
 [-0.23  0.92 -0.43  1.   -0.83 -0.47]
 [-0.17 -0.87  0.38 -0.83  1.   -0.03]
 [ 0.85 -0.39  0.45 -0.47 -0.03  1.  ]]
cov_x
[[ 0.25 -0.14  0.15 -0.17  0.04  0.27]
 [-0.14  0.48 -0.29  0.49 -0.43 -0.25]
 [ 0.15 -0.29  0.25 -0.3   0.25  0.21]
 [-0.17  0.49 -0.3   0.5  -0.43 -0.27]
 [ 0.04 -0.43  0.25 -0.43  0.43  0.13]
 [ 0.27 -0.25  0.21 -0.27  0.13  0.33]]
cov_x_sum
[1.0200685741889233, 2.0848815819927293, 1.4367412553109085, 2.163581259107753, 1.7102169368449882, 1.4660493830482904]
