In [8]:
import numpy as np
from sklearn.decomposition import PCA
import time

Task 1


In [9]:
import numpy as np
import sympy as sp

# Define the matrix
A = np.array([
    [0, 1, 1],
    [np.sqrt(2), 2, 0],
    [0, 1, 1]
])

# Compute eigenvalues and eigenvectors using numpy
eigenvalues, eigenvectors = np.linalg.eig(A)

# Display eigenvalues in symbolic form to match the output format
# Define symbols to show exact values
sqrt2 = sp.sqrt(2)
eigenvalues_exact = [
    (3/2) - (sp.sqrt(1 + 4 * sqrt2) / 2),
    (sp.sqrt(1 + 4 * sqrt2) / 2) + (3/2),
    0
]

# Displaying eigenvalues
print("The eigenvalues are:")
for i, eigenval in enumerate(eigenvalues_exact):
    print(f"λ_{i+1} =", eigenval)

# Find the corresponding eigenvectors for each eigenvalue
# (Using numpy's eigenvectors for a numerical approximation)
print("\nThe corresponding eigenvectors are (numerical approximation):")
for i, eigenvector in enumerate(eigenvectors.T):
    print(f"Eigenvector for λ_{i+1}:", eigenvector)


The eigenvalues are:
λ_1 = 1.5 - sqrt(1 + 4*sqrt(2))/2
λ_2 = sqrt(1 + 4*sqrt(2))/2 + 1.5
λ_3 = 0

The corresponding eigenvectors are (numerical approximation):
Eigenvector for λ_1: [-0.43834959 -0.78466507 -0.43834959]
Eigenvector for λ_2: [ 0.70710678 -0.5         0.5       ]
Eigenvector for λ_3: [ 0.61731105 -0.4877029   0.61731105]


Task 2

In [10]:
import numpy as np

# Step 1: Standardize / Normalize Data (Mean Centering)
def standardize_data(matrix):
    # Subtract the mean of each column (feature)
    mean_centered = matrix - np.mean(matrix, axis=0)
    return mean_centered

# Step 2: Compute the Covariance Matrix
def compute_covariance_matrix(matrix):
    covariance_matrix = np.cov(matrix, rowvar=False)
    return covariance_matrix

# Step 3: Compute Eigenvectors and Eigenvalues of the Covariance Matrix
def compute_eigen(covariance_matrix):
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
    return eigenvalues, eigenvectors

# Step 4: Compute Explained Variance and Select N Principal Components
def explained_variance(eigenvalues):
    total_variance = sum(eigenvalues)
    explained_variance_ratio = [(i / total_variance) for i in sorted(eigenvalues, reverse=True)]
    return explained_variance_ratio

# Select the top N components
def select_top_components(eigenvalues, eigenvectors, n_components):
    # Sort eigenvalues and eigenvectors
    idx = eigenvalues.argsort()[::-1]  # Indices of sorted eigenvalues (in descending order)
    sorted_eigenvalues = eigenvalues[idx]
    sorted_eigenvectors = eigenvectors[:, idx]
    # Select the top N components
    top_eigenvectors = sorted_eigenvectors[:, :n_components]
    return top_eigenvectors

# Step 5: Create Feature Vector with All PCs, Sorted by Their Importance
def create_feature_vector(matrix, top_components):
    # Project the data onto the top principal components
    feature_vector = np.dot(matrix, top_components)
    return feature_vector

# Main function to compute all steps of PCA
def perform_pca(matrix, n_components):
    # Step 1: Standardize Data
    standardized_data = standardize_data(matrix)
    print("Standardized Data (Mean Centered):\n", standardized_data)

    # Step 2: Compute Covariance Matrix
    covariance_matrix = compute_covariance_matrix(standardized_data)
    print("\nCovariance Matrix:\n", covariance_matrix)

    # Step 3: Compute Eigenvalues and Eigenvectors
    eigenvalues, eigenvectors = compute_eigen(covariance_matrix)
    print("\nEigenvalues:\n", eigenvalues)
    print("\nEigenvectors:\n", eigenvectors)

    # Step 4: Explained Variance Ratio
    explained_variance_ratio = explained_variance(eigenvalues)
    print("\nExplained Variance Ratio:\n", explained_variance_ratio)

    # Step 5: Select Top N Components and Create Feature Vector
    top_components = select_top_components(eigenvalues, eigenvectors, n_components)
    feature_vector = create_feature_vector(standardized_data, top_components)
    print("\nFeature Vector (Principal Components):\n", feature_vector)

    return feature_vector

# Define the data matrix (example matrix)
A = np.array([[2.5, 2.4], [0.5, 0.7], [2.2, 2.9], [1.9, 2.2], [3.1, 3.0], [2.3, 2.7], [2, 1.6], [1, 1.1], [1.5, 1.6], [1.1, 0.9]])

# Number of principal components to select
n_components = 2

# Run PCA
feature_vector = perform_pca(A, n_components)

Standardized Data (Mean Centered):
 [[ 0.69  0.49]
 [-1.31 -1.21]
 [ 0.39  0.99]
 [ 0.09  0.29]
 [ 1.29  1.09]
 [ 0.49  0.79]
 [ 0.19 -0.31]
 [-0.81 -0.81]
 [-0.31 -0.31]
 [-0.71 -1.01]]

Covariance Matrix:
 [[0.61655556 0.61544444]
 [0.61544444 0.71655556]]

Eigenvalues:
 [0.0490834  1.28402771]

Eigenvectors:
 [[-0.73517866 -0.6778734 ]
 [ 0.6778734  -0.73517866]]

Explained Variance Ratio:
 [0.9631813143486458, 0.036818685651354155]

Feature Vector (Principal Components):
 [[-0.82797019 -0.17511531]
 [ 1.77758033  0.14285723]
 [-0.99219749  0.38437499]
 [-0.27421042  0.13041721]
 [-1.67580142 -0.20949846]
 [-0.9129491   0.17528244]
 [ 0.09910944 -0.3498247 ]
 [ 1.14457216  0.04641726]
 [ 0.43804614  0.01776463]
 [ 1.22382056 -0.16267529]]


Task  3

In [11]:
import numpy as np
import time

def custom_svd(matrix):
    # Step 1: Compute A^T * A and A * A^T
    ATA = matrix.T @ matrix
    AAT = matrix @ matrix.T

    # Step 2: Eigen decomposition of A^T * A and A * A^T
    eigenvalues_v, V = np.linalg.eig(ATA)  # V contains eigenvectors of A^T * A
    eigenvalues_u, U = np.linalg.eig(AAT)  # U contains eigenvectors of A * A^T

    # Step 3: Sort singular values (square roots of eigenvalues) in descending order
    singular_values = np.sqrt(np.sort(eigenvalues_u)[::-1])

    # Step 4: Create Sigma matrix with singular values on the diagonal
    Sigma = np.zeros_like(matrix, dtype=float)
    for i in range(len(singular_values)):
        Sigma[i, i] = singular_values[i]

    return U, Sigma, V.T

# Test matrix
A = np.array([[1, 2, 0], [2, 3, 1], [0, 1, 0]])

# Custom SVD computation
start_time = time.time()
U_custom, Sigma_custom, VT_custom = custom_svd(A)
end_time = time.time()
print("Custom SVD computation time:", end_time - start_time, "seconds")
print("U matrix:\n", U_custom)
print("Sigma matrix:\n", Sigma_custom)
print("V^T matrix:\n", VT_custom)

# Using numpy's SVD for comparison
start_time_np = time.time()
U_np, Sigma_np, VT_np = np.linalg.svd(A, full_matrices=True)
end_time_np = time.time()
print("Numpy SVD computation time:", end_time_np - start_time_np, "seconds")
print("U matrix from Numpy:\n", U_np)
print("Sigma matrix from Numpy:\n", np.diag(Sigma_np))
print("V^T matrix from Numpy:\n", VT_np)

Custom SVD computation time: 0.0013272762298583984 seconds
U matrix:
 [[-0.49727948 -0.6611152   0.56181831]
 [-0.84604119  0.2260912  -0.48280128]
 [-0.19216509  0.7154086   0.6717612 ]]
Sigma matrix:
 [[4.40267883 0.         0.        ]
 [0.         0.71870969 0.        ]
 [0.         0.         0.31603086]]
V^T matrix:
 [[-0.49727948 -0.84604119 -0.19216509]
 [-0.6611152   0.2260912   0.7154086 ]
 [ 0.56181831 -0.48280128  0.6717612 ]]
Numpy SVD computation time: 0.00029397010803222656 seconds
U matrix from Numpy:
 [[-0.49727948  0.56181831 -0.6611152 ]
 [-0.84604119 -0.48280128  0.2260912 ]
 [-0.19216509  0.6717612   0.7154086 ]]
Sigma matrix from Numpy:
 [[4.40267883 0.         0.        ]
 [0.         0.71870969 0.        ]
 [0.         0.         0.31603086]]
V^T matrix from Numpy:
 [[-0.49727948 -0.84604119 -0.19216509]
 [-0.56181831  0.48280128 -0.6717612 ]
 [-0.6611152   0.2260912   0.7154086 ]]


Task 4

In [12]:
%%time
# Define a recursive Fibonacci function
def fib(n):
    if n <= 1:
        return n
    return fib(n - 1) + fib(n - 2)

# Call the Fibonacci function
result = fib(32)
print(result)

2178309
CPU times: user 873 ms, sys: 965 µs, total: 874 ms
Wall time: 875 ms


Task 5

In [13]:
import time

# Start time
start = time.time()

# The code you want to test stays here
# For example, let's calculate the Fibonacci number for n = 32
def fib(n):
    if n <= 1:
        return n
    return fib(n - 1) + fib(n - 2)

result = fib(32)
print(result)

# End time
end = time.time()

# Print the runtime
print("Execution time:", end - start, "seconds")

2178309
Execution time: 0.8986618518829346 seconds
