# Chapter 09
## Dimensionality reduction using feature extraction

### 9.1 Reducing features using principal components

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets

In [None]:
digits = datasets.load_digits()
features = StandardScaler().fit_transform(digits.data)
pca = PCA(n_components=.99, whiten=True)
features_pca = pca.fit_transform(features)

print('Original number of features: ', features.shape[1])
print('Reduced number of features: ', features_pca.shape[1])

### 9.2 Reducing features when data is linearly inseparable

In [None]:
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

In [None]:
features, _ = make_circles(n_samples=100, random_state=1, noise=.1, factor=.1)
kpca = KernelPCA(kernel='rbf', gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)

print('Original number of features: ', features.shape[1])
print('Reduced number of features: ', features_kpca.shape[1])

### 9.3 Reducing features by maximizing class separability

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [None]:
iris = datasets.load_iris()
features = iris.data
target = iris.target

lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)

print('Original number of features: ', features.shape[1])
print('Reduced number of features: ', features_lda.shape[1])
print('Explained variance', lda.explained_variance_ratio_[0].round(2))

In [None]:
lda = LinearDiscriminantAnalysis(n_components=None)
features_lda = lda.fit(features, target)
lda_var_ratios = lda.explained_variance_ratio_

def select_n_components(var_ratio, goal_var: float) -> int:
    total_variance = 0.0
    n_components = 0

    for explained_variance in var_ratio:
        total_variance += explained_variance
        n_components += 1
        if total_variance >= goal_var:
            break
    return n_components

select_n_components(lda_var_ratios, 0.95)

### 9.4 Reducing features using matrix factorization

In [None]:
from sklearn.decomposition import NMF

In [None]:
digits = datasets.load_digits()
features = digits.data
nmf = NMF(n_components=10, random_state=1)
features_nmf = nmf.fit_transform(features)

print('Original number of features: ', features.shape[1])
print('Reduced number of features: ', features_nmf.shape[1])

### 9.5 Reducing features on sparse data

In [None]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
import numpy as np

In [None]:
digits = datasets.load_digits()
features = StandardScaler().fit_transform(digits.data)
features_sparse = csr_matrix(features)
tsvd = TruncatedSVD(n_components=10)
features_sparse_tsvd = tsvd.fit(features_sparse).transform(features_sparse)

print('Original number of features: ', features_sparse.shape[1])
print('Reduced number of features: ', features_sparse_tsvd.shape[1])

In [None]:
tsvd.explained_variance_ratio_[0:3].sum().round(4)

In [None]:
tsvd = TruncatedSVD(n_components=features_sparse.shape[1]-1)
features_tsvd = tsvd.fit(features)
tsvd_var_ratios = tsvd.explained_variance_ratio_

def select_n_components(var_ratio, goal_var):
    total_variance = 0.0
    n_components = 0

    for explained_variance in var_ratio:
        total_variance += explained_variance
        n_components += 1
        if total_variance >= goal_var:
            break
    return n_components

select_n_components(tsvd_var_ratios, 0.95)