In [1]:
import numpy as np

# Step 1: Create a random 125x100 data matrix
np.random.seed(42)  # For reproducibility
X = np.random.rand(125, 100)

# Step 2: Normalize the data (subtract mean of each column)
X_mean = np.sum(X, axis=0) / X.shape[0]  # Compute column-wise mean
X_normalized = X - X_mean  # Subtract mean

# Step 3: Compute the covariance matrix
m = X_normalized.shape[0]
cov_matrix = (X_normalized.T @ X_normalized) / (m - 1)

# Step 4: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)  # Use numpy for simplicity

# Step 5: Sort eigenvalues and eigenvectors
sorted_indices = np.argsort(eigenvalues)[::-1]  # Sort in descending order
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

# Step 6: Project the data onto the top 2 eigenvectors
k = 2  # Number of principal components
top_eigenvectors = eigenvectors[:, :k]  # Take the top k eigenvectors
projected_data = X_normalized @ top_eigenvectors  # Project the data

# Output the shape and the first 5 rows of the projected data
print("Shape of Projected Data:", projected_data.shape)
print("First 5 Rows of Projected Data:\n", projected_data[:5])


Shape of Projected Data: (125, 2)
First 5 Rows of Projected Data:
 [[ 0.35375434  0.56593412]
 [-0.33807505 -0.74280578]
 [-0.36218891  1.03412225]
 [-0.28056637 -1.51305829]
 [ 0.29752949  0.38943832]]


In [4]:
import numpy as np
import pandas as pd

# Step 1: Load the dataset
# Example: Load data from a CSV file
# Replace "your_dataset.csv" with your actual file
data = pd.read_csv("brandPCA.csv")

# Step 2: Handle non-numeric data (if any)
# Select only numeric columns from the dataframe
data_numeric = data.select_dtypes(include=[np.number])

# Step 3: Convert to NumPy array
X = data_numeric.values  # Now it's guaranteed to be numeric

# Step 4: Normalize the data (subtract mean of each column)
X_mean = np.mean(X, axis=0)  # Compute column-wise mean
X_normalized = X - X_mean    # Subtract the mean

# Step 5: Compute the covariance matrix
m = X_normalized.shape[0]  # Number of samples
cov_matrix = (X_normalized.T @ X_normalized) / (m - 1)

# Step 6: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 7: Sort eigenvalues and eigenvectors
sorted_indices = np.argsort(eigenvalues)[::-1]  # Sort in descending order
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

# Step 8: Project the data onto the top k eigenvectors
k = 2  # Number of principal components (adjust as needed)
top_eigenvectors = eigenvectors[:, :k]  # Select top k eigenvectors
projected_data = X_normalized @ top_eigenvectors  # Project the data

# Step 9: Output the results
print("Shape of Original Data:", X.shape)

print("Shape of Projected Data:", projected_data.shape)
print("First 5 Rows of Projected Data:\n", projected_data[:5])


Shape of Original Data: (300, 8)
Shape of Projected Data: (300, 2)
First 5 Rows of Projected Data:
 [[-6772.04215548   -24.65617001]
 [-6788.04209888   -24.31599705]
 [-6816.04203409   -24.64157632]
 [-6825.04190568   -24.02905129]
 [-6836.04196427   -23.56872119]]


In [5]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Step 1: Load the image
# Replace this with the path to your image
image_path = 'apple.jpg'

# Load the image (grayscale for simplicity)
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

# Step 2: Flatten the image into a 1D vector
image_flattened = image.flatten()

# Step 3: Normalize (mean center) the image
image_mean = np.mean(image_flattened)
image_normalized = image_flattened - image_mean

# Step 4: Compute the covariance matrix
# For a single image, we essentially have one data point. PCA is usually applied to multiple data points.
# Here we compute the variance of the flattened image pixels as a single feature.
cov_matrix = np.cov(image_normalized)  # Covariance of the pixel values

# Step 5: Compute eigenvalues and eigenvectors
# Since it's a single vector, we will find the eigenvalue and eigenvector of the covariance matrix.
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Step 6: Sort eigenvalues and eigenvectors (optional)
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

# Step 7: Project the image onto the top eigenvector
# Projecting the image onto the first principal component (the most significant direction of variance)
image_projected = np.dot(image_normalized, eigenvectors[:, 0])

# Step 8: Visualize the image and its projection
plt.figure(figsize=(12, 6))

# Original Image
plt.subplot(1, 2, 1)
plt.imshow(image, cmap='gray')
plt.title('Original Image')
plt.axis('off')

# Reconstructed Image from PCA (using only the top principal component)
reconstructed_image = eigenvectors[:, 0].reshape(image.shape) * image_projected + image_mean
plt.subplot(1, 2, 2)
plt.imshow(reconstructed_image, cmap='gray')
plt.title('Reconstructed Image (PCA)')
plt.axis('off')

plt.show()

# Print results
print(f"Eigenvalues: {eigenvalues}")
print(f"Eigenvectors: {eigenvectors}")


LinAlgError: 0-dimensional array given. Array must be at least two-dimensional