# Installing Libraries (Python version >= 3.8)

In [None]:
import sys
version = sys.version_info
print(version)
assert version.major == 3 and version.minor >= 8

In [None]:
!python -m pip install numpy==1.23.5 scikit-learn==1.2.2 matplotlib==3.7.4

# Extracting the Principal Components

## 1. Standardizing the data

In [None]:
import numpy as np
from sklearn import datasets, model_selection


def standardize(X: np.ndarray) -> np.ndarray:
    return (X - X.mean(axis=0)) / X.std(axis=0)


dataset = datasets.load_wine()
X = dataset.data
y = dataset.target
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)

X_train = standardize(X_train)
X_test = standardize(X_test)

print("X_train => samples: {}; features: {}; mean: {:.2f}; standard: {:.2f}".format(*X_train.shape, X_train.mean(), X_train.std()))
print("y_train => samples: {}".format(*y_train.shape))
print("X_test => samples: {}; features: {}; mean: {:.2f}; standard: {:.2f}".format(*X_test.shape, X_test.mean(), X_test.std()))
print("y_test => samples: {}".format(*y_test.shape))

## 2. Creating the covariance matrix

In [None]:
cov_mat = np.cov(X_train.T)
print("Covariance matrix: \n{}".format(cov_mat))

## 3. Obtaining the eigenvalues and eigenvectors of the covariance matrix

In [None]:
eigen_vals, eigen_vecs = np.linalg.eig(cov_mat)
print("Eigenvalues: \n{}".format(eigen_vals))
print("Eigenvectors: \n{}".format(eigen_vecs))

## 4. Selecting principal components using the explained variance

In [None]:
explained_variance = [eigen_val / sum(eigen_vals) for eigen_val in sorted(eigen_vals, reverse=True)]
cumulative_explained_variance = np.cumsum(explained_variance)

In [None]:
import matplotlib.pyplot as plt

plt.bar(range(1, len(explained_variance) + 1), explained_variance, alpha=0.5, align="center", label="individual explained variance")
plt.step(range(1, len(cumulative_explained_variance) + 1), cumulative_explained_variance, where="mid", label="cumulative explained variance")
plt.ylabel("Explained variance ratio")
plt.xlabel("Principal component index")
plt.legend(loc="best")
plt.show()

# Feature Transformation

## 1. Selecting k eigenvectors that correspond to the k largest eigenvalues

In [None]:
eigen_pairs = [(np.abs(eigen_vals[i]), eigen_vecs[:, i]) for i in range(len(eigen_vals))]
eigen_pairs.sort(key=lambda k: k[0], reverse=True)

## 2. Constructing the projection matrix W

In [None]:
W = np.hstack((eigen_pairs[0][1][:, np.newaxis], eigen_pairs[1][1][:, np.newaxis], eigen_pairs[2][1][:, np.newaxis]))
print("Matrix W: \n{}".format(W))

## 3. Transforming the d-dimensional input dataset X using the projection matrix W to obtain the new k-dimensional feature subspace

In [None]:
X_train_pca = np.dot(X_train, W)
print("Dataset before transformation => samples: {}; features: {}）".format(*X_train.shape))
print("Dataset after transformation => samples: {}; features: {}）".format(*X_train_pca.shape))

# Visualizing the transformed Wine training dataset

In [None]:
fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(1, 1, 1, projection="3d")

colors = ["r", "b", "g"]
for t, c in zip(np.unique(y_train), colors):
    ax.scatter3D(X_train_pca[y_train == t, 0], X_train_pca[y_train == t, 1], X_train_pca[y_train == t, 2], c=c, label=t, s=50)

ax.set_title("Dimension Reduction to 3D")
ax.set_xlabel("Principle Component 1")
ax.set_ylabel("Principle Component 2")
ax.set_zlabel("Principle Component 3")
plt.tight_layout()
plt.legend()
plt.show()

In [None]:
W = np.hstack((eigen_pairs[0][1][:, np.newaxis], eigen_pairs[1][1][:, np.newaxis]))

X_train_pca = np.dot(X_train, W)
print("Dataset before transformation => samples: {}; features: {}）".format(*X_train.shape))
print("Dataset after transformation => samples: {}; features: {}）".format(*X_train_pca.shape))

In [None]:
colors = ["r", "b", "g"]
markers = ["s", "x", "o"]
for t, c, m in zip(np.unique(y_train), colors, markers):
    plt.scatter(X_train_pca[y_train == t, 0], X_train_pca[y_train == t, 1], c=c, label=t, marker=m)

plt.title("Dimension Reduction to 2D")
plt.xlabel("Principle Component 1")
plt.ylabel("Principle Component 2")
plt.tight_layout()
plt.legend()
plt.show()