In [None]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
wine = fetch_ucirepo(id=109) 
  
# data (as pandas dataframes) 
X = wine.data.features 
y = wine.data.targets 
  
# metadata 
print(wine.metadata) 
  
# variable information 
print(wine.variables) 


In [None]:
# construct covariance matrix
import numpy as np
cov_mat = np.cov(X.T)


import matplotlib.pyplot as plt

# find eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_mat)

# sort the eigenvalues, and the corresponding eigenvectors
idx = eigenvalues.argsort()[::-1]

eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:,idx]

# print the eigenvalues
print(eigenvalues)

# plot the eigenvalues with a log scale
plt.plot(eigenvalues)
plt.yscale('log')
plt.show()

# and without a log scale
plt.plot(eigenvalues)
plt.show()


In [None]:
# with the eigenvectors, we can transform the data
# pick the first two eigenvectors

from sklearn.decomposition import PCA

# # find the mean of the data first
# mean = np.mean(X.T, axis=1)

# # subtract the mean from the data
# X_centered = X - mean

# # project the data onto the eigenvectors
# projected_data = X_centered.dot(eigenvectors)

# # plot the data on the first two principal components
# plt.scatter(projected_data[0], projected_data[1])

pca = PCA(n_components=2)
projected_data = pca.fit_transform(X)

# convert the projected data to a pandas dataframe
import pandas as pd
projected_data = pd.DataFrame(projected_data, columns=['PC1', 'PC2'])

# associate the data with the original class labels
projected_data['class'] = y

plt.figure(figsize=(8, 6))
scatter = plt.scatter(projected_data['PC1'], projected_data['PC2'], c=projected_data['class'], cmap='viridis', marker='o', alpha=0.6)
plt.title('Scatter Plot of the Wine Dataset Projected onto the First Two Principal Components')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(True)
plt.colorbar(scatter, label='Class')
plt.show()

In [None]:
# read the wheat data from the seeds_dataset.txt file
import pandas as pd
data = pd.read_csv('seeds_dataset.txt', delim_whitespace=True, header=None)

# last column is the class label, remove it from the data
X = data.iloc[:,:-1].values

# project on two the first two principal components
pca = PCA(n_components=2)
projected_data = pca.fit_transform(X)

projected_data = pd.DataFrame(projected_data, columns=['PC1', 'PC2'])

# associate the data with the original class labels
projected_data['class'] = y

plt.figure(figsize=(8, 6))
scatter = plt.scatter(projected_data['PC1'], projected_data['PC2'], c=projected_data['class'], cmap='viridis', marker='o', alpha=0.6)
plt.title('Scatter Plot of the Seeds Dataset Projected onto the First Two Principal Components')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(True)
plt.colorbar(scatter, label='Class')
plt.show()

# calculate eigenvalues and eigenvectors
cov_mat = np.cov(X.T)
eigenvalues, eigenvectors = np.linalg.eig(cov_mat)

# sort the eigenvalues, and the corresponding eigenvectors
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:,idx]

print(eigenvalues)

# plot the eigenvalues without a log scale
plt.plot(eigenvalues)
plt.show()


In [None]:
matrix = [[9, 6], [6, 4]]

# find eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(matrix)
print(eigenvalues)

In [None]:
mean_X = np.array([1, 1, 1, 1])
u1 = np.array([0.5, 0.5, 0.5, 0.5])
u2 = np.array([0.5, -0.5, 0.5, -0.5])
X1 = np.array([3, 3, 5, 7])

# Center the data by subtracting the mean
X1_centered = X1 - mean_X

# Project the centered data onto the first two eigenvectors
projection_u1 = np.dot(X1_centered, u1)
projection_u2 = np.dot(X1_centered, u2)

# The coordinates of the projection are the scalars from the dot product with each eigenvector
projection_coordinates = (projection_u1, projection_u2)

print(projection_coordinates)