In [7]:
%matplotlib qt
import matplotlib.pyplot as plt
import numpy as np

from sklearn.decomposition import PCA

rng = np.random.RandomState(0)
n_samples = 500
cov = [[3, 3], [3, 4]]
X = rng.multivariate_normal(mean=[0, 0], cov=cov, size=n_samples)
pca = PCA(n_components=1).fit(X)

print(pca.components_)
print(pca.explained_variance_)
print(list(zip(pca.components_, pca.explained_variance_)))

plt.scatter(X[:, 0], X[:, 1], alpha=0.3, label="samples")
for i, (comp, var) in enumerate(zip(pca.components_, pca.explained_variance_)):
    comp = comp * var  # scale component by its variance explanation power
    plt.plot(
        [0, comp[0]],
        [0, comp[1]],
        label=f"Component {i}",
        linewidth=5,
        color=f"C{i + 2}",
    )
plt.gca().set(
    aspect="equal",
    title="2-dimensional dataset with principal components",
    xlabel="first feature",
    ylabel="second feature",
)
plt.legend()
plt.show()

[[-0.64402153 -0.76500736]]
[6.21234881]
[(array([-0.64402153, -0.76500736]), 6.21234881358685)]


In [43]:
x_center = np.mean(X, axis=0)
print(x_center)
X @ pca.components_.T - x_center @ pca.components_.T

[0.12105555 0.11655374]


array([[ 4.67803290e+00],
       [ 2.66567290e+00],
       [ 4.94563228e+00],
       [ 2.59738898e+00],
       [-9.77222253e-02],
       [ 5.32499120e-01],
       [ 2.11330333e+00],
       [ 1.30165553e+00],
       [ 3.98881186e+00],
       [ 9.69610523e-01],
       [-6.36376407e+00],
       [ 2.37955726e+00],
       [ 5.97528571e+00],
       [ 2.84549305e-01],
       [ 4.08429832e+00],
       [ 5.62631580e-01],
       [-2.09934231e+00],
       [-7.23019509e-01],
       [ 3.31121048e+00],
       [-8.22869732e-01],
       [-2.51169037e+00],
       [-4.20089958e+00],
       [-1.13544590e+00],
       [-3.03864476e+00],
       [-3.96013497e+00],
       [-2.12392576e+00],
       [-1.13684573e+00],
       [ 9.41544724e-02],
       [ 3.36620357e-01],
       [-1.45445885e+00],
       [-1.55200816e+00],
       [-1.90897530e+00],
       [ 6.21750108e-01],
       [-4.00323427e+00],
       [-2.15348807e+00],
       [ 2.03158001e+00],
       [ 3.08383419e+00],
       [ 1.19758445e+00],
       [-2.0

In [8]:
pca.fit_transform(X)

array([[ 4.67803290e+00],
       [ 2.66567290e+00],
       [ 4.94563228e+00],
       [ 2.59738898e+00],
       [-9.77222253e-02],
       [ 5.32499120e-01],
       [ 2.11330333e+00],
       [ 1.30165553e+00],
       [ 3.98881186e+00],
       [ 9.69610523e-01],
       [-6.36376407e+00],
       [ 2.37955726e+00],
       [ 5.97528571e+00],
       [ 2.84549305e-01],
       [ 4.08429832e+00],
       [ 5.62631580e-01],
       [-2.09934231e+00],
       [-7.23019509e-01],
       [ 3.31121048e+00],
       [-8.22869732e-01],
       [-2.51169037e+00],
       [-4.20089958e+00],
       [-1.13544590e+00],
       [-3.03864476e+00],
       [-3.96013497e+00],
       [-2.12392576e+00],
       [-1.13684573e+00],
       [ 9.41544724e-02],
       [ 3.36620357e-01],
       [-1.45445885e+00],
       [-1.55200816e+00],
       [-1.90897530e+00],
       [ 6.21750108e-01],
       [-4.00323427e+00],
       [-2.15348807e+00],
       [ 2.03158001e+00],
       [ 3.08383419e+00],
       [ 1.19758445e+00],
       [-2.0