# PCA auf Bilddateien
## Bild in Graustufen

Für Bilder in Graustufen ist die Berechnung einfacher, weil solche Bilder als Matrix der Größe (Breite in Pixel, Höhe in Pixel) dargestellt werden können. 
Wie unten gezeigt, müssen bei farbigen Bildern die Kanäle aufgetrennt und seperat bearbeitet werden.

In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
%matplotlib inline


def show_image(im: Image, ax: plt.Axes = None):
    "Show image 'im' in notebook cell using matplotlib's imshow"
    if ax is None:
        _, ax = plt.subplots(1, 1, figsize=(20, 13))
    ax.imshow(im)

    # Hide x and y axis
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)


* Darstellung des ursprünglichen Bildes

In [None]:
im = Image.open('data/elbphilharmonie_sw.tiff')
print('Image size: ', im.size, '\nImage mode: ', im.mode)

show_image(im)


* Hauptkomponentenanalyse des Bildes

In [None]:
# Convert internal representation to grey and convert to numpy array
im_np_array = np.asarray(im.convert('L'))

pca = PCA()
score = pca.fit_transform(im_np_array)
coeff = pca.components_.T


In [None]:
cumulative_variance_explained = pca.explained_variance_ratio_.cumsum()
fig, ax = plt.subplots(1, 1, figsize=(20, 10))
ax.plot(cumulative_variance_explained)
ax.set_ylim([0, 1.1])
ax.set(xlabel='Anzahl Dimensionen', ylabel='Anteil erklärter Varianz')

for var_expl in [0.9, 0.95, 0.99]:
    print(
        f'Number of dimensions explaining {var_expl * 100} % of the variance: {np.nonzero(cumulative_variance_explained >= var_expl)[0].min()}')


* Darstellung des mittels PCA dimensionsreduzierten Bildes

In [None]:
nb_components = 404

im_np_array_reduced = score[:, :nb_components] @ coeff[:,
                                                       :nb_components].T + pca.mean_
show_image(Image.fromarray(im_np_array_reduced))


## Farbiges Bild

In [None]:
im = Image.open('data/elbphilharmonie.tiff')
show_image(im)


In [None]:
# Convert internal representation to grey and convert to numpy array
im_np_array = np.asarray(im)

print('Shape: ', im_np_array.shape)

fig, ax = plt.subplots(1, 3, figsize=(18, 4))
for channel in range(3):
    im_single_color = np.zeros_like(im_np_array)
    im_single_color[:, :, channel] = im_np_array[:, :, channel]
    show_image(Image.fromarray(im_single_color), ax=ax[channel])


In [None]:
pca_dict = {}
score_dict = {}
coeff_dict = {}

for channel in range(3):
    pca_dict[channel] = PCA()
    score_dict[channel] = pca_dict[channel].fit_transform(
        im_np_array[:, :, channel])
    coeff_dict[channel] = pca_dict[channel].components_.T


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 10))
for channel in range(3):
    cumulative_variance_explained = pca_dict[channel].explained_variance_ratio_.cumsum(
    )
    ax.plot(cumulative_variance_explained, label=channel)
ax.set_ylim([0, 1.1])
ax.set(xlabel='Anzahl Dimensionen', ylabel='Anteil erklärter Varianz')
ax.legend()


In [None]:
nb_components = 1000

im_np_array_reduced = np.zeros_like(im_np_array)

for channel in range(3):
    im_np_array_reduced[:, :, channel] = \
        score_dict[channel][:, :nb_components] @ \
        coeff_dict[channel][:, :nb_components].T + \
        pca_dict[channel].mean_

show_image(Image.fromarray(im_np_array_reduced))
