### Principal Component Analysis (PCA)

In [41]:
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [42]:
digits = datasets.load_digits()

digits.data[:5]

array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.,  0.,  0., 13., 15., 10.,
        15.,  5.,  0.,  0.,  3., 15.,  2.,  0., 11.,  8.,  0.,  0.,  4.,
        12.,  0.,  0.,  8.,  8.,  0.,  0.,  5.,  8.,  0.,  0.,  9.,  8.,
         0.,  0.,  4., 11.,  0.,  1., 12.,  7.,  0.,  0.,  2., 14.,  5.,
        10., 12.,  0.,  0.,  0.,  0.,  6., 13., 10.,  0.,  0.,  0.],
       [ 0.,  0.,  0., 12., 13.,  5.,  0.,  0.,  0.,  0.,  0., 11., 16.,
         9.,  0.,  0.,  0.,  0.,  3., 15., 16.,  6.,  0.,  0.,  0.,  7.,
        15., 16., 16.,  2.,  0.,  0.,  0.,  0.,  1., 16., 16.,  3.,  0.,
         0.,  0.,  0.,  1., 16., 16.,  6.,  0.,  0.,  0.,  0.,  1., 16.,
        16.,  6.,  0.,  0.,  0.,  0.,  0., 11., 16., 10.,  0.,  0.],
       [ 0.,  0.,  0.,  4., 15., 12.,  0.,  0.,  0.,  0.,  3., 16., 15.,
        14.,  0.,  0.,  0.,  0.,  8., 13.,  8., 16.,  0.,  0.,  0.,  0.,
         1.,  6., 15., 11.,  0.,  0.,  0.,  1.,  8., 13., 15.,  1.,  0.,
         0.,  0.,  9., 16., 16.,  5.,  0.,  0.,  0.,  0.,  

In [43]:
features = StandardScaler().fit_transform(digits.data)

features[:5]

array([[ 0.        , -0.33501649, -0.04308102,  0.27407152, -0.66447751,
        -0.84412939, -0.40972392, -0.12502292, -0.05907756, -0.62400926,
         0.4829745 ,  0.75962245, -0.05842586,  1.12772113,  0.87958306,
        -0.13043338, -0.04462507,  0.11144272,  0.89588044, -0.86066632,
        -1.14964846,  0.51547187,  1.90596347, -0.11422184, -0.03337973,
         0.48648928,  0.46988512, -1.49990136, -1.61406277,  0.07639777,
         1.54181413, -0.04723238,  0.        ,  0.76465553,  0.05263019,
        -1.44763006, -1.73666443,  0.04361588,  1.43955804,  0.        ,
        -0.06134367,  0.8105536 ,  0.63011714, -1.12245711, -1.06623158,
         0.66096475,  0.81845076, -0.08874162, -0.03543326,  0.74211893,
         1.15065212, -0.86867056,  0.11012973,  0.53761116, -0.75743581,
        -0.20978513, -0.02359646, -0.29908135,  0.08671869,  0.20829258,
        -0.36677122, -1.14664746, -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -1.09493684,  0.03864775,  0.

In [44]:
pca = PCA(n_components=10, whiten=True)

In [45]:
features_pca = pca.fit_transform(features)
features_pca[:5]

array([[ 0.70632379, -0.39518529, -1.73810442,  1.0185354 , -0.1561553 ,
         0.32742217, -0.91026245,  1.040353  ,  0.09874812, -0.60157726],
       [ 0.21736471,  0.38280095,  1.72886126, -0.89345638, -0.57666952,
        -0.42113759,  1.20729181,  0.38778331,  0.80485624,  0.06473074],
       [ 0.48038829, -0.13155512,  1.33192342, -1.02567578, -1.2092041 ,
         0.5760404 , -0.83119204,  0.81559155,  0.5825518 , -0.82539296],
       [-1.11460531, -0.35963531, -0.35289846, -1.09825019, -0.32278978,
         0.45111804,  0.61921642, -0.9577201 ,  0.21881907,  0.55133747],
       [ 1.67114433, -0.45264066,  0.42903539, -0.71274997, -0.99419407,
         0.8881769 ,  0.6950538 , -0.66828613, -1.19461953,  0.88619977]])

In [46]:
print(f'Original number of features: {features.shape[1]}')
print(f'Reduced number of features: {features_pca.shape[1]}')

Original number of features: 64
Reduced number of features: 10


In [47]:
from sklearn.datasets import make_circles
from sklearn.decomposition import KernelPCA

In [54]:
# create linearly inseparable data
features, _ = make_circles(n_samples=1000, random_state=1, noise=0.1, factor=0.1)

features[:5]

array([[ 0.23058395, -0.10671314],
       [-0.0834218 , -0.22647078],
       [ 0.9246533 , -0.71492522],
       [-0.10217077, -0.89283523],
       [-1.01719242,  0.24737775]])

In [57]:
kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)

features_kpca[:5]

array([[ 0.08961469],
       [ 0.17082614],
       [-0.36539792],
       [-0.37995615],
       [-0.37090715]])

In [50]:
print(f'Original number of features: {features.shape[1]}')
print(f'Reduced number of features: {features_kpca.shape[1]}')

Original number of features: 2
Reduced number of features: 1


### Linear Discriminant Analysis (LDA)

In [60]:
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [70]:
iris = datasets.load_iris()
features = iris.data
target = iris.target

features[:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [71]:
lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)

features_lda[:5]

array([[8.06179978],
       [7.12868772],
       [7.48982797],
       [6.81320057],
       [8.13230933]])

In [72]:
print(f'Original number of features: {features.shape[1]}')
print(f'Reduced number of features: {features_lda.shape[1]}')

Original number of features: 4
Reduced number of features: 1


In [73]:
lda.explained_variance_ratio_

array([0.9912126])

#### Take home -> Module 5

use PCA, Kernel PCA, TSNE