#### Helper Functions

In [1]:
import numpy as np

def is_positive_semi_definite(matrix):
    """
        Checks if a matrix is positive semi-definite.
    """
    eigenvalues = np.linalg.eigvalsh(matrix)
    return np.all(eigenvalues >= -1e-10)  # Allowing for numerical precision issues


####################################################################################################
               # HELPER FUNCTIONS FOR THE NOTEBOOK
####################################################################################################
def frobenius_norm_difference(A, B):
    """
        Calculates the Frobenius norm of the difference between two matrices.
    """
    return np.linalg.norm(A - B, 'fro') / np.linalg.norm(A, 'fro')

def sample_covariance_estimator(X):
    """
        Estimates the covariance matrix of a data matrix.
    """
    n = X.shape[1]
    covariance_matrix = np.dot(X, X.T) / n
    
    return covariance_matrix

def generate_sample_data(n_features, n_samples, rank):
    """
    Generates a sample data matrix with a specified low rank.
    """
    rank = min(rank, n_features, n_samples)
    A = np.random.randn(n_features, rank) @ np.random.randn(rank, n_samples)
    
    return A

In [2]:
def compare_eigenvalues(eigvals_sample, eigvals_nystrom, k):
    top_eigvals_sample = eigvals_sample[:k]
    top_eigvals_nystrom = eigvals_nystrom[:k]
    relative_error = np.linalg.norm(top_eigvals_sample - top_eigvals_nystrom) / np.linalg.norm(top_eigvals_sample)
    return relative_error

def compare_eigenvectors(eigvecs_sample, eigvecs_nystrom, k):
    top_eigvecs_sample = eigvecs_sample[:, :k]
    top_eigvecs_nystrom = eigvecs_nystrom[:, :k]
    frobenius_norm = np.linalg.norm(top_eigvecs_sample - top_eigvecs_nystrom, 'fro') / np.linalg.norm(top_eigvecs_sample, 'fro')
    return frobenius_norm

#### Nystorm Covariance Estimator

In [3]:
def nystrom_covariance_estimator(X, num_landmarks):
    """
        Estimates the covariance matrix of a data matrix using the Nyström method.
    """
    p, n = X.shape      # p: number of features, n: number of samples

    # Step 1: Select landmark points (randomly select num_landmarks columns)
    indices = np.random.choice(p, num_landmarks, replace=False)
    Y = X[indices, :]      # Y.shape = (num_landmarks, n)

    # Step 2: Compute the orthogonal projection matrix P using the pseudoinverse
    YYT = np.dot(Y, Y.T) 
    YYT_pinv = np.linalg.pinv(YYT)
    P = np.dot(Y.T, np.dot(YYT_pinv, Y))    # P.shape = (n, n)

    # Step 3: Project data onto the subspace spanned by the landmark points
    X_proj = np.dot(X, P)
    
    # Step 4: Construct the Nyström covariance estimator
    Sigma_hat = np.dot(X_proj, X.T) / n    # Sigma_hat.shape = (p, p)
    
    return Sigma_hat

##### Bias of Nystorm Covariance Estimator

In [11]:
def nystorm_estimator_bias(Sigma, Sigma_hat, I, J, n, k):
    """
        Computes the bias of the Nyström covariance estimator.
    """
    Sigma_I = Sigma[np.ix_(I, I)]
    Sigma_J = Sigma[np.ix_(J, J)]
    Sigma_IJ = Sigma[np.ix_(I, J)]
    Schur_complement = Sigma_J - Sigma_IJ.T @ np.linalg.inv(Sigma_I) @ Sigma_IJ
    
    B_J = ((n - k) / n) * Schur_complement
    B = np.zeros_like(Sigma)
    B[np.ix_(J, J)] = B_J
    
    return B, Schur_complement

##### MSE of Nystorm Covariance Estimator

In [31]:
def nystorm_estimator_mse(Sigma, Sigma_hat, n, k, I):
    """
    Computes the mean square error (MSE) of the Nyström covariance estimator.
    """

    Sigma_I = Sigma_hat[np.ix_(I, I)]
    # MSE of the sample covariance estimator of the Schur complement
    MSE_Sigma_I_Schur = (1 / (n - k)) * (np.trace(Sigma_I @ Sigma_I) + np.trace(Sigma_I) ** 2)
    # MSE of the sample covariance estimator
    MSE_Sigma = (1 / n) * (np.trace(Sigma @ Sigma) + np.trace(Sigma) ** 2)
    # Compute the MSE of the Nyström covariance estimator
    MSE = MSE_Sigma + (((n - k)** 2) / n ** 2) * (np.linalg.norm(Sigma_I, 'fro') - MSE_Sigma_I_Schur)
    
    return MSE, MSE_Sigma

##### Test of Nystorm Covariance Estimator

In [4]:
# Example usage
p, n, rank = 100, 50, 10  # Number of features, number of samples, rank of the data matrix
X = generate_sample_data(p, n, rank)

k = 10  # Number of landmark points (tests with different values of this parameter can be found below)
Sigma = sample_covariance_estimator(X)
Sigma_hat = nystrom_covariance_estimator(X, k)

# Compute bias
I = np.random.choice(p, k, replace=False)
J = np.setdiff1d(np.arange(p), I)
#bias, schur = nystorm_estimator_bias(Sigma, Sigma_hat, I, J, n, k)
#expected_value = 

# Compute MSE
#mse, mse_sample = nystorm_estimator_mse(Sigma, Sigma_hat, n, k, I)

print("Frobenius norm difference between the sample covariance matrix and the Nyström estimator: {:.2f}%".format(frobenius_norm_difference(Sigma, Sigma_hat)))
print("Is the Nyström estimator positive semi-definite? {}".format(is_positive_semi_definite(Sigma_hat)))
print(Sigma_hat.shape)

Frobenius norm difference between the sample covariance matrix and the Nyström estimator: 0.00%
Is the Nyström estimator positive semi-definite? True
(100, 100)


##### Nystorm SVD


In [14]:
import numpy as np

####################################################################################################
               # HELPER FUNCTIONS FOR THE NOTEBOOK
####################################################################################################

def frobenius_norm_difference(A, B):
    """
        Calculates the Frobenius norm of the difference between two matrices.
    """
    return np.linalg.norm(A - B, 'fro') / np.linalg.norm(A, 'fro') * 100

def sample_covariance_estimator(X):
    """
        Estimates the sample covariance matrix of a data matrix.
    """
    n = X.shape[1]
    covariance_matrix = np.dot(X, X.T) / n
    
    return covariance_matrix

def generate_low_rank_data(n_features, n_samples, rank):
    """
    Generates a low rank data matrix.
    """
    rank = min(rank, n_features, n_samples)
    A = np.random.randn(n_features, rank) @ np.random.randn(rank, n_samples)
    
    return A

####################################################################################################
                # NYSTROM COVARIANCE ESTIMATOR
####################################################################################################
def nystrom_covariance_estimator(X, num_landmarks):
    """
        Estimates the covariance matrix of a data matrix using the Nyström method.
    """
    p, n = X.shape      # p: number of features, n: number of samples

    # Step 1: Select landmark points (randomly select num_landmarks columns)
    indices = np.random.choice(p, num_landmarks, replace=False)
    Y = X[indices, :]      # Y.shape = (num_landmarks, n)

    # Step 2: Compute the orthogonal projection matrix P using the pseudoinverse
    YYT = np.dot(Y, Y.T) 
    YYT_pinv = np.linalg.pinv(YYT)
    P = np.dot(Y.T, np.dot(YYT_pinv, Y))    # P.shape = (n, n)

    # Step 3: Project data onto the subspace spanned by the landmark points
    X_proj = np.dot(X, P)
    
    # Step 4: Construct the Nyström covariance estimator
    Sigma_hat = np.dot(X_proj, X.T) / n    # Sigma_hat.shape = (p, p)
    
    return Sigma_hat

####################################################################################################
                # NYSTROM PRINCIPAL COMPONENT ANALYSIS 
####################################################################################################
def nystrom_pca(X, num_landmarks, k):
    """
        Estimates the principal components of a data matrix using the Nyström method.
    """
    p, n = X.shape  # p: number of features, n: number of samples

    # Step 1: Select landmark points (randomly select num_landmarks columns)
    indices_I = np.random.choice(p, num_landmarks, replace=False)
    Y = X[indices_I, :]  # X_I.shape = (num_landmarks, n)

    # Step 2: Define J and X_J
    indices_J = np.setdiff1d(np.arange(p), indices_I)
    Z = X[indices_J, :]  # X_J.shape = (p - num_landmarks, n)

    # Step 3: Compute Thin SVD of X_I
    U_Y, D_Y, V_Y_T = np.linalg.svd(Y, full_matrices=False)
    # Step 4: Construct W_I and W_J
    W_Y = (1 / np.sqrt(n)) * np.dot(U_Y, np.diag(D_Y))
    W_Z = (1 / np.sqrt(n)) * np.dot(Z, V_Y_T.T)

    # Step 5: Compute W
    W = np.vstack([W_Y, W_Z]) # I suspect the error is here

    # Step 6: Perform thin SVD on W
    U, Lambda, V_T = np.linalg.svd(W, full_matrices=False)

    # Side step: Compute the projection matrix P
    YYT = np.dot(Y, Y.T) 
    YYT_pinv = np.linalg.pinv(YYT)
    P = np.dot(Y.T, np.dot(YYT_pinv, Y)) 


    Sigma_Nystrom = 1/n * (X @ P @ X.T)
    # Eigenvalues and eigenvectors
    eigenvalues = Lambda**2
    eigenvectors = U

    print("Frobenius Norm Difference for Relation (1)", frobenius_norm_difference(Y @ Y.T, U_Y @ np.diag(D_Y) @ np.diag(D_Y) @ U_Y.T))
    print("Frobenius Norm Difference for Relation (2)", frobenius_norm_difference(Y @ Z.T, U_Y @ np.diag(D_Y) @ V_Y_T @ Z.T))
    print("Frobenius Norm Difference for Relation (3)", frobenius_norm_difference(Z @ Y.T, Z @ V_Y_T.T @ np.diag(D_Y) @ U_Y.T))
    print("Frobenius Norm Difference for Relation (4)", frobenius_norm_difference(P, V_Y_T.T @ V_Y_T))
    # Covariance matrix estimator, should be equal to Nyström covariance estimator
    Sigma_hat = np.dot(W, W.T)

    return Sigma_hat, eigenvalues[:k], eigenvectors[:, :k]

####################################################################################################
                # EXAMPLE USAGE
####################################################################################################

p, n, rank = 100, 50, 10  # Number of features, number of samples, rank of the data matrix
X = generate_low_rank_data(p, n, rank)  # Generate a low rank data matrix  

# Sample covariance matrix
Sigma_simple = sample_covariance_estimator(X)

# Nyström covariance estimator
num_landmarks = 10  # Number of landmark points
Sigma_nystrom = nystrom_covariance_estimator(X, num_landmarks)

# Nyström PCA
k = 10  # Number of principal components
Sigma_pca, eigvals_pca, eigvecs_pca = nystrom_pca(X, num_landmarks, k)

# Assuming Sigma_nystrom is already defined and is a square symmetric matrix
eigenvalues, eigenvectors = np.linalg.eig(Sigma_nystrom)

# Sorting the eigenvalues and eigenvectors
sorted_indices = np.argsort(eigenvalues)[::-1]  # Sort indices by eigenvalues in descending order
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]

# Extract the 10 largest eigenvalues and corresponding eigenvectors
top_10_eigenvalues = sorted_eigenvalues[:10]
top_10_eigenvectors = sorted_eigenvectors[:, :10]

# Compare the eigenvalues and eigenvectors
relative_error_eigenvalues = compare_eigenvalues(eigvals_pca, top_10_eigenvalues, k)
frobenius_norm_eigenvectors = compare_eigenvectors(eigvecs_pca, top_10_eigenvectors, k)

print("Relative error of the eigenvalues between Nystrom and PCA Nystrom: {:.2f}%".format(relative_error_eigenvalues * 100))
print("Relative error of eigenvectors between Nystrom and PCA Nystrom: {:.2f}".format(frobenius_norm_eigenvectors))

# Compare the results
print("Frobenius norm difference between the sample covariance matrix and the Nyström estimator: {:.2f}%".format(frobenius_norm_difference(Sigma_simple, Sigma_nystrom)))
print("Frobenius norm difference between the Nystorm Covariance estimator and the Nyström PCA estimator: {:.2f}%".format(frobenius_norm_difference(Sigma_nystrom, Sigma_pca)))

Frobenius Norm Difference for Relation (1) 6.094143089049153e-14
Frobenius Norm Difference for Relation (2) 6.388136660264183e-14
Frobenius Norm Difference for Relation (3) 6.865837747188016e-14
Frobenius Norm Difference for Relation (4) 6.323765702091671e-13
Relative error of the eigenvalues between Nystrom and PCA Nystrom: 0.00%
Relative error of eigenvectors between Nystrom and PCA Nystrom: 1.43
Frobenius norm difference between the sample covariance matrix and the Nyström estimator: 0.00%
Frobenius norm difference between the Nystorm Covariance estimator and the Nyström PCA estimator: 135.25%
