## Univariate Selection

In [3]:
# Feature Extraction with Univariate Statistical Tests (Chi-squared for classification)
import pandas as pd
import numpy
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
# load data
data = pd.read_csv("pima-indians-diabetes.csv", 
                  names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'])

data.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
array = data.values
X = array[:,0:8]
Y = array[:,8]
# feature extraction
test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X, Y)

In [5]:
# summarize scores
numpy.set_printoptions(precision=3)
print(fit.scores_)
features = fit.transform(X)
# summarize selected features
print(features[0:5,:])

[ 111.52  1411.887   17.605   53.108 2175.565  127.669    5.393  181.304]
[[148.    0.   33.6  50. ]
 [ 85.    0.   26.6  31. ]
 [183.    0.   23.3  32. ]
 [ 89.   94.   28.1  21. ]
 [137.  168.   43.1  33. ]]


## Recursive Feature Elimination

In [10]:
# Feature Extraction with RFE
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

# feature extraction
model = LogisticRegression()
rfe = RFE(model, 4)
fit = rfe.fit(X, Y)

fit.support_

array([ True,  True, False, False, False,  True,  True, False])

In [11]:
fit.ranking_

array([1, 1, 2, 4, 5, 1, 1, 3])

## Principal Component Analysis

In [14]:
# Feature Extraction with PCA
import numpy as np
from pandas import read_csv
from sklearn.decomposition import PCA

# feature extraction
pca = PCA(n_components=4)
fit = pca.fit(X)
# summarize components
fit.explained_variance_ratio_

array([0.889, 0.062, 0.026, 0.013])

In [15]:
fit.components_

array([[-2.022e-03,  9.781e-02,  1.609e-02,  6.076e-02,  9.931e-01,
         1.401e-02,  5.372e-04, -3.565e-03],
       [-2.265e-02, -9.722e-01, -1.419e-01,  5.786e-02,  9.463e-02,
        -4.697e-02, -8.168e-04, -1.402e-01],
       [-2.246e-02,  1.434e-01, -9.225e-01, -3.070e-01,  2.098e-02,
        -1.324e-01, -6.400e-04, -1.255e-01],
       [-4.905e-02,  1.198e-01, -2.627e-01,  8.844e-01, -6.555e-02,
         1.928e-01,  2.699e-03, -3.010e-01]])