# Kovarianz, Eigenvektoren und Lineare Regression

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

def plot(X, fig, ax):
    ax.scatter(X[:,0], X[:,1], alpha=0.5)
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.axis('equal');

In [None]:
m = 10

rng = np.random.default_rng(50)
X = rng.standard_normal(size=(m, 2))

fig, ax = plt.subplots(1, figsize=(10,10))
plot(X, fig, ax);

mean_vec = np.mean(X, axis=0)
ax.scatter(mean_vec[0], mean_vec[1], s=100, c='red', label='Mean')
ax.legend()

ax.axvline(mean_vec[0], linestyle='dashed', c='grey')
ax.axhline(mean_vec[1], linestyle='dashed', c='grey' )
ax.text(0.8,1.5, 'Quadrant I', fontsize=18, c='grey')
ax.text(0.8,-2.2, 'Quadrant II', fontsize=18, c='grey')
ax.text(-2.5,-2.2, 'Quadrant III', fontsize=18, c='grey')
ax.text(-2.5,1.5, 'Quadrant IV', fontsize=18, c='grey');


In [None]:
cov_mat = (X - mean_vec).T.dot((X - mean_vec)) / (X.shape[0]-1)
print('Kovarianz Matrix \n%s' %cov_mat)

## Eigenvektoren und Eigenwerte

In [None]:
#cov_mat = np.cov(X.T)

eig_vals, eig_vecs = np.linalg.eig(cov_mat)

print('Eigenvectors \n%s' %eig_vecs)
print('\nEigenvalues \n%s' %eig_vals)

print((np.dot(cov_mat, eig_vecs) - eig_vals * eig_vecs) < 0.0000001)

In [None]:
def draw_vector(v0, v1, ax, label=''):
    arrowprops=dict(arrowstyle='->',
                    linewidth=2,
                    shrinkA=0, shrinkB=0)
    ax.annotate('', v1, v0, arrowprops=arrowprops)
    ax.text(v1[0], v1[1], label)

for idx, (length, vector) in enumerate(zip(eig_vals, eig_vecs)):
    v = vector * np.sqrt(length)
    print(length, vector)
    draw_vector(mean_vec, mean_vec + v, ax, label=f'Principal Component {idx+1}')

ax.legend()
fig

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression(fit_intercept=True)

model.fit(X[:,0].reshape(-1,1), X[:,1])

xfit = np.linspace(-3, 1.7, 100)
yfit = model.predict(xfit[:, np.newaxis])

ax.plot(xfit, yfit, linestyle='dotted', c='green', label='Regression line');
ax.legend()
fig