# PCA Reconstruction 
stough 202-

Let's reconstruct a face according to the pca trained on the 
ORL faces dataset.

In [1]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.offsetbox import (OffsetImage,
                                  AnnotationBbox)
import matplotlib.animation as animation
from sklearn.decomposition import PCA
from skimage.color import rgb2gray

from torchvision import transforms
from torchvision.datasets import ImageFolder

NUMPCA = 400
IMAGE = None 
# IMAGE = 'josh1ThumbSmall.png' # None

imshape = (112, 92)

In [2]:
orl_faces = ImageFolder('/home/dip365/data/ORL/', 
                       transform=transforms.Compose([
                           transforms.Grayscale(),
                           transforms.ToTensor(),
                           transforms.Lambda(lambda x: np.array(x).ravel())
                       ]))

allfaces = np.stack([orl_faces[i][0] for i in range(len(orl_faces))])

In [3]:
# Now get NUMPCA most important 10304-d directions in the space of faces.
pca = PCA(n_components=NUMPCA)
Xp = pca.fit_transform(allfaces) # Xp is 320 x 10, the 10-pca dimensional projections of each image.
print('Explained variation per principal component: {}...'.format(pca.explained_variance_ratio_[:20]))
print(f'Total variance explained in {NUMPCA} dimensions: {np.sum(pca.explained_variance_ratio_)}')

Explained variation per principal component: [0.1760954  0.12906641 0.06841045 0.0557894  0.05109907 0.03362535
 0.02447196 0.02331064 0.01962201 0.01802782 0.01438924 0.01397923
 0.01119041 0.01067275 0.00974647 0.00913625 0.00876043 0.00844453
 0.00735491 0.00695367]...
Total variance explained in 400 dimensions: 1.0


In [4]:
np.sum(pca.explained_variance_ratio_[:20])

0.70014644

&nbsp;

## Now we'll reconstruct some image.

In [5]:
# Trying to reconstruct with this image with 20 dimensions.

In [6]:
if IMAGE is None:
    which = np.random.choice(len(allfaces))
    I = np.reshape(allfaces[which,:], imshape).copy()
    coeff = Xp[which, :].squeeze()
else:
    I = rgb2gray(plt.imread(IMAGE))
    coeff = pca.transform(I.ravel()[None,:]).squeeze()

In [7]:
f, ax = plt.subplots(1,1,figsize=(3,3))
ax.imshow(I, cmap='gray')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

&nbsp;

## The reconstruction is, as before, a linear combination of
pc's added to the mean.

In [8]:
# We need to include the mean as though it is one of the dimensions
# that can be added...
# And let's also reorder the pcas

# In order of absolute magnitude
# inorder = np.arange(0, len(coeff))
inorder = np.argsort(np.abs(coeff))[::-1]

pcas = np.concatenate([pca.mean_[None,:], pca.components_[inorder]], axis=0)
coeff = np.array([1] + list(coeff[inorder]))

In [9]:
def reconstruct(ind):
    global pcas, coeff, imshape
    return np.reshape(np.matmul(coeff[:ind+1], pcas[:ind+1]), imshape)

In [10]:
from ipywidgets import VBox, IntSlider

plt.ioff()
plt.clf()

slider = IntSlider(
    orientation='horizontal',
    value=0,
    min=0,
    max=NUMPCA,
    description='$c_i$'
)


curi = 0

Ir = np.reshape(pca.mean_, imshape)
AvgFace = Ir.copy()


# Figure display and artists.

fig_args = {'num':' ', 'frameon':True, 'sharex':True, 'sharey':True}
fig, ax = plt.subplots(1,3, figsize=(8,3), **fig_args)

ldisp = ax[0].imshow(Ir, cmap='gray')
ltext = ax[0].set_title('mean')

mdisp = ax[1].imshow(Ir, cmap='gray')
ax[1].set_title('reconstruction')

rdisp = ax[2].imshow(I-Ir, cmap='bwr')
ax[2].set_title('error');



# updatefig function
def update_image(change):
    global curi, inorder, pcas, Ir, ldisp, I, ltext, mdisp, rdisp, imshape
    
    # See where change.new is versus curi.
    # Then update the reconstruction
    # This was pointless! Both for the difficulty of coding 
    # and for the accumulation of floating point error.
#     newi = change.new
#     diff = newi - curi
#     if diff > 0:
#         while curi != newi:
#             curi += 1
#             Ir = Ir + np.reshape(coeff[curi]*pcas[curi], imshape)
#     elif diff < 0:
#         while curi != newi:
#             curi -= 1
#             Ir = Ir - np.reshape(coeff[curi]*pcas[curi], imshape)
    
    curi = change.new
    
    pc = np.reshape(pcas[curi], imshape)
    ldisp.set_array(pc)
    ldisp.set_clim(pc.min(), pc.max())
    
    # Tried, but failed, to add tex math in an f-string: 
    # https://stackoverflow.com/questions/50209287/evaluating-variable-contain-mathematical-expression-in-f-strings
    # ltext.set_text(f'$pc_{{curi}}$')
    # ltext.set_text(r'$pc_{:03d}$'.format(curi)) # only the first digit is subscript
    # This works because the old %format style does not conflict with the use of {} in math tex.
    ltext.set_text('$pc_{%03d}$' % curi)  
    
    
    Ir = reconstruct(curi)
    mdisp.set_array(Ir)
    mdisp.set_clim(Ir.min(), Ir.max())
    
    
    rdisp.set_array(I-Ir)
    
    fig.canvas.draw()
    fig.canvas.flush_events()


slider.observe(update_image, names='value')

VBox([slider, fig.canvas])

# Demonstration of changing the first component a on the reconstruction of the image

VBox(children=(IntSlider(value=0, description='$c_i$', max=400), Canvas(toolbar=Toolbar(toolitems=[('Home', 'R…