In [None]:
%matplotlib inline
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import numpy as np

def plot(X, fig, ax):
    ax.scatter(X[:,0], X[:,1], alpha=0.5)
    ax.set_xlabel(r'$x_1$')
    ax.set_ylabel(r'$x_2$')
    ax.axis('equal');


In [None]:
m = 100

rng = np.random.default_rng(1)
x = 10 * rng.random(m)
y = x + rng.standard_normal(m)
X = np.column_stack((x,y))
fig, ax = plt.subplots(1, figsize=(10,10))
plot(X, fig, ax);


In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X)
print(f'Skalierer: {scaler}')

X_std = scaler.transform(X)

fig, ax = plt.subplots(1, figsize=(10,10))
plot(X_std, fig, ax);
ax.set(xlim=[-3, 3], ylim=[-3, 3]);

In [None]:
mean_vec = np.mean(X_std, axis=0)
print(f'Means: {mean_vec}')

cov_mat = (X_std - mean_vec).T.dot(X_std - mean_vec) / (X_std.shape[0]-1)

display(Markdown('Kovarianz Matrix: \n\n' + r'$Cov(x_1, x_2)= \begin{bmatrix} \sigma_{1,1}\;\sigma_{1,2} \\ \sigma_{2,1}\;\sigma_{2,2} \end{bmatrix}= \begin{bmatrix}' + '{:.3f}'.format(cov_mat[0,0]) + '\;' + '{:.3f}'.format(cov_mat[0,1]) + r' \\ ' + '{:.3f}'.format(cov_mat[1,0]) + '\,' + '{:.3f}'.format(cov_mat[1,1]) + ' \end{bmatrix}$' % cov_mat))

In [None]:
eig_vals_, eig_vecs_ = np.linalg.eig(cov_mat)

eig_vals = np.sort(eig_vals_)[::-1]
eig_vecs = eig_vecs_[:, eig_vals_.argsort()[::-1]].T # np.linalg.eig gibt Spalten-Eigenvektoren zurück, daher transponieren

print(f'''
Eigenwerte:
{eig_vals}

Eigenvektoren:
{eig_vecs}
''')

In [None]:
def draw_vector(v0, v1, ax, c='black'):
    arrowprops=dict(arrowstyle='->',
                    linewidth=2,
                    shrinkA=0, shrinkB=0,
                    color=c
                   )
    ax.annotate('', v1, v0, arrowprops=arrowprops)

for length, vector in zip(eig_vals, eig_vecs): 
    v = vector * 2 * np.sqrt(length)
    draw_vector([0,0], [0.0] + v, ax)
fig

Die Eigenvektoren der Kovarianzmatrix bilden ein gutes Koordinatensystem, welches der Verteilung der Punktwolke folgt.

In [None]:
fig, axs = plt.subplots(1,2, figsize=(21,10))

# zunächst noch mal unsere Punktewolke
plot(X_std, fig, axs[0])
axs[0].set(xlim=[-3, 3], ylim=[-3, 3]);

cs = ['green', 'red']
for idx, (length, vector) in enumerate(zip(eig_vals, eig_vecs)):
    v = vector * 2 * np.sqrt(length)
    draw_vector([0,0], [0.0] + v, axs[0], cs[idx])

# und jetzt die Transformation durch PCA
X_pca = X_std.dot(eig_vecs)

plot(X_pca, fig, axs[1])
draw_vector([0,0], [2*np.sqrt(eig_vals[0]), 0], axs[1], cs[0])
draw_vector([0,0], [0, 2*np.sqrt(eig_vals[1])], axs[1], cs[1])

axs[1].set(xlabel='Eigenvektor 1', ylabel='Eigenvektor 2')
axs[1].set(xlim=[-3, 3], ylim=[-3, 3]);
