In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import numpy as np

# Step 1: Define the covariance matrix
cov_matrix = np.array([[1, 0.8, 0.8],
					   [0.8, 1, 0.8],
					   [0.8, 0.8, 1]])

# Step 2: Define the mean vector (assuming mean of each feature is 0)
mean_vector = np.zeros(cov_matrix.shape[0])

# Step 3: Set the random seed for reproducibility
np.random.seed(42)

# Step 4: Generate the dataset
num_samples = 1000  # Number of samples you want to generate
data = np.random.multivariate_normal(mean_vector, cov_matrix, size=num_samples)

# Step 5: Verify the covariance structure (optional)
empirical_cov_matrix = np.cov(data, rowvar=False)
print("Empirical Covariance Matrix:\n", empirical_cov_matrix)

In [3]:
import numpy as np
from scipy.spatial.distance import pdist, squareform

# Step 1: Compute the chi-squared distance matrix
def chi_squared_distance(X):
    return squareform(pdist(X, metric='seuclidean'))

# Step 2: Convert to similarity matrix
def similarity_from_distance(D):
    return 1 / (1 + D)

# Step 3: Double-center the similarity matrix
def double_centering(S):
    n = S.shape[0]
    I = np.eye(n)
    ones = np.ones((n, n)) / n
    return -0.5 * (I - ones) @ S @ (I - ones)

# Step 4: Perform eigen decomposition
def eigen_decomposition(B):
    eigvals, eigvecs = np.linalg.eigh(B)
    # Sort eigenvalues and eigenvectors
    idx = np.argsort(eigvals)[::-1]
    eigvals = eigvals[idx]
    eigvecs = eigvecs[:, idx]
    return eigvals, eigvecs

# Sample data matrix X (n observations, p variables)
X = np.random.rand(100, 10)

# Step 1: Compute chi-squared distance matrix
D = chi_squared_distance(X)

# Step 2: Convert to similarity matrix
S = similarity_from_distance(D)

# Step 3: Double-center the similarity matrix
B = double_centering(S)

# Step 4: Perform eigen decomposition
eigvals, eigvecs = eigen_decomposition(B)

# The eigenvectors eigvecs are the principal coordinates in the transformed space


In [7]:
import numpy as np
from scipy.spatial.distance import cdist

# Sample data matrix X (n observations, p variables)
X = np.array([
    [5, 1, 3],
    [10, 2, 6],
    [15, 3, 9],
    [20, 4, 12]
])

# Number of observations (n) and variables (p)
n, p = X.shape

# Compute chi-squared distance matrix
def chi_squared_distance(X):
    D = cdist(X, X, metric='seuclidean')
    return D

# Step 1: Compute the chi-squared distance matrix D
D = chi_squared_distance(X)

# Step 2: Center the chi-squared distance matrix using double-centering formula
I = np.eye(n)
ones = np.ones((n, n)) / n

# Double-center the distance matrix
B = -0.5 * (I - ones) @ D @ (I - ones)

print("Original Data Matrix (X):\n", X)
print("\nChi-squared Distance Matrix (D):\n", D)
print("\nDouble-centered Matrix for PCoA (B):\n", B)

# Perform eigen decomposition on the centered matrix B
eigvals, eigvecs = np.linalg.eigh(B)

# Sort eigenvalues and eigenvectors in descending order
idx = np.argsort(eigvals)[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

print("\nEigenvalues:\n", eigvals)
print("\nEigenvectors:\n", eigvecs)


Original Data Matrix (X):
 [[ 5  1  3]
 [10  2  6]
 [15  3  9]
 [20  4 12]]

Chi-squared Distance Matrix (D):
 [[0.         1.44913767 2.89827535 4.34741302]
 [1.44913767 0.         1.44913767 2.89827535]
 [2.89827535 1.44913767 0.         1.44913767]
 [4.34741302 2.89827535 1.44913767 0.        ]]

Double-centered Matrix for PCoA (B):
 [[ 1.26799547  0.18114221 -0.54342663 -0.90571105]
 [ 0.18114221  0.54342663 -0.18114221 -0.54342663]
 [-0.54342663 -0.18114221  0.54342663  0.18114221]
 [-0.90571105 -0.54342663  0.18114221  1.26799547]]

Eigenvalues:
 [ 2.47383275e+00  7.24568837e-01  4.24442598e-01 -1.02884023e-16]

Eigenvectors:
 [[-0.65328148  0.5        -0.27059805 -0.5       ]
 [-0.27059805 -0.5         0.65328148 -0.5       ]
 [ 0.27059805 -0.5        -0.65328148 -0.5       ]
 [ 0.65328148  0.5         0.27059805 -0.5       ]]


In [9]:
import numpy as np
from scipy.spatial.distance import cdist

# Sample data matrix X (n observations, p variables)
X = np.array([
    [5, 1, 3],
    [10, 2, 6],
    [15, 3, 9],
    [20, 4, 12]
])

# Number of observations (n) and variables (p)
n, p = X.shape

# Compute chi-squared distance matrix
def chi_squared_distance(X):
    m = X.sum(axis=1).reshape(-1, 1)
    D = cdist(X / m, X / m, 'euclidean')**2 / (2 * (1 / m + 1 / m.T))
    return D

# Step 1: Compute the chi-squared distance matrix D
D = chi_squared_distance(X)

# Step 2: Convert distance matrix to similarity matrix
S = 1 / (1 + D)

# Step 3: Center the similarity matrix
ones = np.ones((n, n)) / n
K_prime = S - ones @ S - S @ ones + ones @ S @ ones

# Step 4: Perform eigen decomposition on the centered similarity matrix
eigvals, eigvecs = np.linalg.eigh(K_prime)

# Sort eigenvalues and eigenvectors in descending order
idx = np.argsort(eigvals)[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

print("Original Data Matrix (X):\n", X)
print("\nChi-squared Distance Matrix (D):\n", D)
print("\nSimilarity Matrix (S):\n", S)
print("\nCentered Similarity Matrix (K_prime):\n", K_prime)
print("\nEigenvalues of Centered Similarity Matrix:\n", eigvals)
print("\nEigenvectors of Centered Similarity Matrix (n x n):\n", eigvecs)

# Step 5: Project n x n eigenvectors back to p x p space
# Transforming eigenvectors back to the original variable space
X_centered = X - np.mean(X, axis=0)  # Centering the original data matrix
pca = X_centered.T @ eigvecs[:, :p]  # Projecting eigenvectors onto the original variables

# Normalize the PCA components (optional, depending on your specific application)
pca_normalized = pca / np.linalg.norm(pca, axis=0)

print("\nProjected Eigenvectors in p x p space (PCA components):\n", pca_normalized)


Original Data Matrix (X):
 [[ 5  1  3]
 [10  2  6]
 [15  3  9]
 [20  4 12]]

Chi-squared Distance Matrix (D):
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Similarity Matrix (S):
 [[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

Centered Similarity Matrix (K_prime):
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]

Eigenvalues of Centered Similarity Matrix:
 [0. 0. 0. 0.]

Eigenvectors of Centered Similarity Matrix (n x n):
 [[0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]]

Projected Eigenvectors in p x p space (PCA components):
 [[ 0.84515425  0.84515425 -0.84515425]
 [ 0.16903085  0.16903085 -0.16903085]
 [ 0.50709255  0.50709255 -0.50709255]]
