## Dimensionality Reduction
### Feature Extraction
#### PCA

In [24]:
# import libs
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [25]:
data = datasets.load_digits()
features = StandardScaler().fit_transform(data.data)
features.shape

(1797, 64)

In [26]:
# retain 99% variance
f_pca = PCA(n_components=.99).fit_transform(features)
f_pca.shape

(1797, 54)

### Feature Selection

In [27]:
# low variance -> less information
from sklearn.feature_selection import VarianceThreshold as VT, SelectKBest, chi2

In [28]:
iris = datasets.load_iris()
features, target = iris.data, iris.target

features[0:5]

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2]])

In [29]:
vt = VT(threshold=0.5)
ft = vt.fit(features)

In [30]:
ft.variances_

array([0.68112222, 0.18871289, 3.09550267, 0.57713289])

In [31]:
f_high_var = ft.transform(features)
f_high_var[0:5]

array([[5.1, 1.4, 0.2],
       [4.9, 1.4, 0.2],
       [4.7, 1.3, 0.2],
       [4.6, 1.5, 0.2],
       [5. , 1.4, 0.2]])

####chi2

In [32]:
features = features.astype(int)
features[0:5]

array([[5, 3, 1, 0],
       [4, 3, 1, 0],
       [4, 3, 1, 0],
       [4, 3, 1, 0],
       [5, 3, 1, 0]])

In [33]:
features.shape

(150, 4)

In [34]:
chi2_selector = SelectKBest(chi2, k=2)
f_k_best = chi2_selector.fit_transform(features, target)
f_k_best.shape

(150, 2)

$BY Gaurav Kabra$