<a href="https://colab.research.google.com/github/jadhav-rakesh/ML/blob/main/ds9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Dimensionality Reduction Using Feature Extraction

In [1]:
import numpy as np
import pandas as pd

In [3]:
#Reducing Features Using Principal Components

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets

digits = datasets.load_digits()

features = StandardScaler().fit_transform(digits.data)

pca = PCA(n_components=0.99, whiten=True)

feature_pca = pca.fit_transform(features)

print(features.shape[1])
print(feature_pca.shape[1])

64
54


#Principal component analysis (PCA)
* linear dimensionality reduction technique
* unsupervised technique
*

#Reducing Features When Data Is Linearly Inseparable

In [5]:
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

features, _ = make_circles(n_samples=1000, random_state=1, noise=0.1, factor=0.1)

kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)

print(features.shape[1])
print(features_kpca.shape[1])

2
1


#Reducing Features by Maximizing Class Separability

In [6]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

iris = datasets.load_iris()
features = iris.data
target = iris.target

lda = LinearDiscriminantAnalysis(n_components=1)
features_lda = lda.fit(features, target).transform(features)

print(features.shape[1])
print(features_lda.shape)

4
(150, 1)


In [8]:
lda.explained_variance_ratio_

array([0.9912126])

In [9]:
lda = LinearDiscriminantAnalysis(n_components=None)
features_lda = lda.fit(features, target)

lda_var_ratios = lda.explained_variance_ratio_

def select_n_compnents(var_ratio, goal_var:float) -> int:
    total_variance = 0.0
    n_components = 0
    for expalined_variance in var_ratio:
        total_variance += expalined_variance
        n_components += 1

        if total_variance >= goal_var:
            break

    return n_components

select_n_compnents(lda_var_ratios, 0.95)

1

#Reducing Features Using Matrix Factorization

* NMF is an unsupervised technique for linear dimensionality reduction that factorizes the feature matrix into matrices representing the latent relationship between observations and their features


In [10]:
#You have a feature matrix of nonnegative values and want to reduce the dimensionality.

from sklearn.decomposition import NMF
from sklearn import datasets

digits = datasets.load_digits()

features = digits.data

nmf = NMF(n_components=10, random_state=42)
features_nmf = nmf.fit_transform(features)

print(features.shape[1])
print(features_nmf.shape[1])

64
10




#Reducing Features on Sparse Data

In [11]:
#Truncated Singular Value Decomposition (TSVD):

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
from sklearn import datasets
import numpy as np

digits = datasets.load_digits()

features = StandardScaler().fit_transform(digits.data)

features_sparse = csr_matrix(features)

tsvd = TruncatedSVD(n_components=10)

features_sparse_tvsd = tsvd.fit(features_sparse).transform(features_sparse)

print(features_sparse.shape[1])
print(features_sparse_tvsd.shape[1])

64
10


In [12]:
tsvd.explained_variance_ratio_[0:3].sum()

0.30039385391325724

In [13]:
from functools import total_ordering
tsvd = TruncatedSVD(n_components=features_sparse.shape[1] - 1)
features_tsvd = tsvd.fit(features)

tsvd_var_ratios = tsvd.explained_variance_ratio_

def select_n_components(var_ratio, goal_var):
    total_variacne = 0.0
    n_components = 0
    for expalined_variance in var_ratio:
        total_variacne += expalined_variance
        n_components += 1
        if total_variacne >= goal_var:
            break
    return n_components

select_n_components(tsvd_var_ratios, 0.95)

40