In [16]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import load_digits, fetch_lfw_people
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA as RandomizedPCA
from sklearn.decomposition import SparsePCA
from sklearn.decomposition import PCA

In [17]:
faces = fetch_lfw_people(min_faces_per_person=60)

In [18]:
xtrain,xtest, ytrain,ytest = train_test_split(faces.data, faces.target, test_size=.3, random_state=13)

In [19]:
print('Shape of xtrain :',xtrain.shape)
print('Shape of xtest :',xtest.shape)
print('Shape of ytrain :',ytrain.shape)
print('Shape of ytest :',ytest.shape)

Shape of xtrain : (943, 2914)
Shape of xtest : (405, 2914)
Shape of ytrain : (943,)
Shape of ytest : (405,)


### Reducing Dimension using PCA

In [20]:
pca = PCA(n_components=3)
pca.fit(xtrain)
new_dim = pca.transform(xtrain)

In [21]:
print('Original dimension is :',xtrain.shape)
print('After reducing dimension is :',new_dim.shape)


Original dimension is : (943, 2914)
After reducing dimension is : (943, 3)


I am seeing in previous time dimension was row=943 and column=2914
now the dimension is row=943 column=3
Note: PCA reduce the number of feature not numbfers of rows
n_components parameter have range. It's range is 0 to min(n_sample, n_feature)

In [22]:
pca.components_

array([[-0.00669952, -0.00698868, -0.00749792, ..., -0.00766872,
        -0.0061727 , -0.00570054],
       [ 0.01656939,  0.01566691,  0.0158079 , ..., -0.03752428,
        -0.03711444, -0.0355063 ],
       [-0.01995531, -0.01839818, -0.01683861, ..., -0.03024684,
        -0.02691729, -0.02500654]], dtype=float32)

In [23]:
pca.explained_variance_

array([768106.6 , 609143.94, 293751.25], dtype=float32)

the "components" to define the direction of the vector,
and the "explained variance" to define the squared-length of the vector

In [24]:
pca = PCA(n_components=405)
pca.fit(xtest)
new_dim = pca.transform(xtest)

In [25]:
print('Original dimension is :',xtest.shape)
print('After reducing dimension :',new_dim.shape)

Original dimension is : (405, 2914)
After reducing dimension : (405, 405)


original dimension rows=405 columns=2914                                                                                                      
after reducing dimension rows=405 columns=405

In [None]:
pca = PCA(n_components=1000)
pca.fit(xtrain)

We can't do this because n_components value is not between 0 to min(n_sample, n_feature)

### Using RandomizedPCA

In [27]:
rpca = RandomizedPCA(n_components=500)
rpca.fit(xtrain)

PCA(copy=True, iterated_power='auto', n_components=500, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [28]:
new_dim = rpca.transform(xtrain)

In [29]:
print('Original dimension :',xtrain.shape)
print('After reducing :',new_dim.shape)

Original dimension : (943, 2914)
After reducing : (943, 500)


In [30]:
rpca.components_

array([[-0.00669967, -0.0069887 , -0.00749793, ..., -0.00766872,
        -0.00617269, -0.00570053],
       [ 0.01656933,  0.01566694,  0.01580788, ..., -0.03752431,
        -0.03711446, -0.03550633],
       [-0.0199553 , -0.0183981 , -0.01683855, ..., -0.03024689,
        -0.02691737, -0.02500664],
       ...,
       [ 0.00754386, -0.00432319, -0.01125176, ...,  0.00546851,
         0.01163317,  0.0002484 ],
       [-0.01775719,  0.04461878,  0.00666107, ...,  0.00711606,
        -0.01895783, -0.01897602],
       [-0.04978964,  0.03724992,  0.05136729, ..., -0.01754193,
        -0.03965069, -0.01058245]], dtype=float32)

In [31]:
a = rpca.explained_variance_
a.shape

(500,)

In [None]:
rpca = RandomizedPCA(n_components=500)
rpca.fit(xtest)

It is not possible because n_components is not between 0 to min(n_samples, n_features)

### Backing into Previous Dimension Using PCA

In [33]:
pca = PCA(n_components=100)
pca.fit(xtrain)

PCA(copy=True, iterated_power='auto', n_components=100, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [34]:
new_dim = pca.transform(xtrain)

In [35]:
print('Original dimension is :',xtrain.shape)
print('New dimension :',new_dim.shape)

Original dimension is : (943, 2914)
New dimension : (943, 100)


In [36]:
#backing to previous dimension
back_dimension = pca.inverse_transform(new_dim)
back_dimension.shape

(943, 2914)

Now it back to it's previous dimension

### Using SparsePCA

In [None]:
spca = SparsePCA(n_components=3)
spca.fit(xtest)

In [None]:
new_dim = spca.transform(xtest)

In [None]:
print('Original dimension :', xtest.shape)
print('New dimension :', new_dim.shape)

In [None]:
spca.components_