# PRINCIPAL COMPONENT ANALYSIS

### Import libraries

In [1]:
import warnings 
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

### Import the dataset

In [2]:
digits = load_digits() # Dataset
print(digits.data) 
print(digits.target) 
print(digits.feature_names) 
df = pd.DataFrame(digits['data'], columns = digits['feature_names'])

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]
[0 1 2 ... 8 9 8]
['pixel_0_0', 'pixel_0_1', 'pixel_0_2', 'pixel_0_3', 'pixel_0_4', 'pixel_0_5', 'pixel_0_6', 'pixel_0_7', 'pixel_1_0', 'pixel_1_1', 'pixel_1_2', 'pixel_1_3', 'pixel_1_4', 'pixel_1_5', 'pixel_1_6', 'pixel_1_7', 'pixel_2_0', 'pixel_2_1', 'pixel_2_2', 'pixel_2_3', 'pixel_2_4', 'pixel_2_5', 'pixel_2_6', 'pixel_2_7', 'pixel_3_0', 'pixel_3_1', 'pixel_3_2', 'pixel_3_3', 'pixel_3_4', 'pixel_3_5', 'pixel_3_6', 'pixel_3_7', 'pixel_4_0', 'pixel_4_1', 'pixel_4_2', 'pixel_4_3', 'pixel_4_4', 'pixel_4_5', 'pixel_4_6', 'pixel_4_7', 'pixel_5_0', 'pixel_5_1', 'pixel_5_2', 'pixel_5_3', 'pixel_5_4', 'pixel_5_5', 'pixel_5_6', 'pixel_5_7', 'pixel_6_0', 'pixel_6_1', 'pixel_6_2', 'pixel_6_3', 'pixel_6_4', 'pixel_6_5', 'pixel_6_6', 'pixel_6_7', 'pixel_7_0', 'pixel_7_1', 'pixel_7_2', 'pixel_7_3', 'pixel_7_

### Dataset Analysis


In [3]:
print(df.head())
print(df.shape)

   pixel_0_0  pixel_0_1  pixel_0_2  pixel_0_3  pixel_0_4  pixel_0_5  \
0        0.0        0.0        5.0       13.0        9.0        1.0   
1        0.0        0.0        0.0       12.0       13.0        5.0   
2        0.0        0.0        0.0        4.0       15.0       12.0   
3        0.0        0.0        7.0       15.0       13.0        1.0   
4        0.0        0.0        0.0        1.0       11.0        0.0   

   pixel_0_6  pixel_0_7  pixel_1_0  pixel_1_1  ...  pixel_6_6  pixel_6_7  \
0        0.0        0.0        0.0        0.0  ...        0.0        0.0   
1        0.0        0.0        0.0        0.0  ...        0.0        0.0   
2        0.0        0.0        0.0        0.0  ...        5.0        0.0   
3        0.0        0.0        0.0        8.0  ...        9.0        0.0   
4        0.0        0.0        0.0        0.0  ...        0.0        0.0   

   pixel_7_0  pixel_7_1  pixel_7_2  pixel_7_3  pixel_7_4  pixel_7_5  \
0        0.0        0.0        6.0       13.0

### Preparing for PCA

In [4]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
df_scaled = pd.DataFrame(scaled_data, columns = digits['feature_names'])

In [5]:
print(df_scaled.head())

   pixel_0_0  pixel_0_1  pixel_0_2  pixel_0_3  pixel_0_4  pixel_0_5  \
0        0.0  -0.335016  -0.043081   0.274072  -0.664478  -0.844129   
1        0.0  -0.335016  -1.094937   0.038648   0.268751  -0.138020   
2        0.0  -0.335016  -1.094937  -1.844742   0.735366   1.097673   
3        0.0  -0.335016   0.377661   0.744919   0.268751  -0.844129   
4        0.0  -0.335016  -1.094937  -2.551014  -0.197863  -1.020657   

   pixel_0_6  pixel_0_7  pixel_1_0  pixel_1_1  ...  pixel_6_6  pixel_6_7  \
0  -0.409724  -0.125023  -0.059078  -0.624009  ...  -0.757436  -0.209785   
1  -0.409724  -0.125023  -0.059078  -0.624009  ...  -0.757436  -0.209785   
2  -0.409724  -0.125023  -0.059078  -0.624009  ...   0.259230  -0.209785   
3  -0.409724  -0.125023  -0.059078   1.879691  ...   1.072563  -0.209785   
4  -0.409724  -0.125023  -0.059078  -0.624009  ...  -0.757436  -0.209785   

   pixel_7_0  pixel_7_1  pixel_7_2  pixel_7_3  pixel_7_4  pixel_7_5  \
0  -0.023596  -0.299081   0.086719   0.208293

### PCA with two components

### Instantiate and fit the model

In [6]:
pca = PCA(n_components = 2) 
df_pca = pca.fit_transform(df_scaled)

component_df = pd.DataFrame(data = df_pca, columns=['First Component','Second Component'])
print(component_df.head())

pca.explained_variance_

pca.explained_variance_ratio_

np.cumsum(pca.explained_variance_ratio_)

   First Component  Second Component
0        -1.914214         -0.954502
1        -0.588980          0.924636
2        -1.302039         -0.317189
3         3.020770         -0.868772
4        -4.528949         -1.093480


array([0.12033916, 0.21594971])

### PCA with four components


In [7]:
pca = PCA(n_components = 4) 
df_pca = pca.fit_transform(df_scaled)

component_df = pd.DataFrame(data = df_pca, columns=['First Component','Second Component','Third Component','Fourth Component'])
print(component_df.head())

pca.explained_variance_

pca.explained_variance_ratio_

np.cumsum(pca.explained_variance_ratio_)

   First Component  Second Component  Third Component  Fourth Component
0        -1.914214         -0.954502        -3.946035         -2.028723
1        -0.588980          0.924636         3.924755          1.779850
2        -1.302039         -0.317189         3.023333          2.043376
3         3.020770         -0.868772        -0.801744          2.187039
4        -4.528949         -1.093480         0.973121          1.419510


array([0.12033916, 0.21594971, 0.30039385, 0.36537793])