In [1]:
import pandas as pd

# Read DataFrame
dataset = pd.read_csv('datasets/wine.data', sep=',', header=None)
X = dataset.iloc[:, 1:]
Y = dataset.iloc[:, :1]

## SELECT K BEST

In [2]:
"""
class sklearn.feature_selection.SelectKBest(score_func=<function f_classif>, k=10)

    Select features according to the k highest scores.

    Read more in the User Guide.

Parameters:
    - score_func : callable
        Function taking two arrays X and y, and returning a pair of arrays (scores, pvalues) 
        or a single array with scores. Default is f_classif (see below “See also”). The default 
        function only works with classification tasks.
    - k : int or “all”, optional, default=10
        Number of top features to select. The “all” option bypasses selection, for use in a parameter search.
        
Methods
    fit(X, y) : Run score function on (X, y) and get the appropriate features.
    fit_transform(X[, y]) : Fit to data, then transform it.
    get_params([deep]) : Get parameters for this estimator.
    get_support([indices]) : Get a mask, or integer index, of the features selected
    inverse_transform(X) : Reverse the transformation operation
    set_params(**params) : Set the parameters of this estimator.
    transform(X) : Reduce X to the selected features.
    
"""

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X, Y)
features = test.transform(X)
pd.DataFrame(features).head(5)

Unnamed: 0,0,1,2,3
0,127.0,3.06,5.64,1065.0
1,100.0,2.76,4.38,1050.0
2,101.0,3.24,5.68,1185.0
3,113.0,3.49,7.8,1480.0
4,118.0,2.69,4.32,735.0


## RECURSIVE FEATURE ELIMINATION

In [3]:
"""
class sklearn.feature_selection.RFE(estimator, n_features_to_select=None, step=1, verbose=0)


    Feature ranking with recursive feature elimination.
    Given an external estimator that assigns weights to features (e.g., the coefficients of a linear model), 
    the goal of recursive feature elimination (RFE) is to select features by recursively considering smaller 
    and smaller sets of features. First, the estimator is trained on the initial set of features and the 
    importance of each feature is obtained either through a coef_ attribute or through a 
    feature_importances_ attribute. Then, the least important features are pruned from current set of features. 
    That procedure is recursively repeated on the pruned set until the desired number of features to select 
    is eventually reached.

    Read more in the User Guide.
    
Parameters:
    - estimator : object
        A supervised learning estimator with a fit method that provides information about feature 
        importance either through a coef_ attribute or through a feature_importances_ attribute.
    - n_features_to_select : int or None (default=None)
        The number of features to select. If None, half of the features are selected.
    - step : int or float, optional (default=1)
        If greater than or equal to 1, then step corresponds to the (integer) number of features 
        to remove at each iteration. If within (0.0, 1.0), then step corresponds to the percentage 
        (rounded down) of features to remove at each iteration.
    - verbose : int, default=0
        Controls verbosity of output.

"""

from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
rfe = RFE(model, n_features_to_select=3, step=3)
rfe.fit(X, Y)
print("Num Features: {}".format(rfe.n_features_))
print("Selected Features: {}".format(rfe.support_))
print("Features Ranking: {}".format(rfe.ranking_))

Num Features: 3
Selected Features: [False False  True False False False  True False False  True False False
 False]
Features Ranking: [4 3 1 3 5 4 1 5 4 1 3 2 5]


  y = column_or_1d(y, warn=True)


## Principal component analysis (PCA)

In [4]:
"""
class sklearn.decomposition.PCA(n_components=None, copy=True, whiten=False, svd_solver=’auto’, tol=0.0, iterated_power=’auto’, random_state=None)[source]


    Linear dimensionality reduction using Singular Value Decomposition of the data to project it to a 
    lower dimensional space.

    It uses the LAPACK implementation of the full SVD or a randomized truncated SVD by the method of 
    Halko et al. 2009, depending on the shape of the input data and the number of components to extract.

    It can also use the scipy.sparse.linalg ARPACK implementation of the truncated SVD.

    Notice that this class does not support sparse input. See TruncatedSVD for an alternative with sparse data.

    Read more in the User Guide.

Parameters:
    - n_components : int, float, None or string
        Number of components to keep. if n_components is not set all components are kept:
            n_components == min(n_samples, n_features)
        if n_components == ‘mle’ and svd_solver == ‘full’, Minka’s MLE is used to guess the dimension 
        if 0 < n_components < 1 and svd_solver == ‘full’, select the number of components such that the 
        amount of variance that needs to be explained is greater than the percentage specified by 
        n_components n_components cannot be equal to n_features for svd_solver == ‘arpack’.
    - copy : bool (default True)
        If False, data passed to fit are overwritten and running fit(X).transform(X) will not yield the
        expected results, use fit_transform(X) instead.
    - whiten : bool, optional (default False)
        When True (False by default) the components_ vectors are multiplied by the square root of 
        n_samples and then divided by the singular values to ensure uncorrelated outputs with unit 
        component-wise variances.

        Whitening will remove some information from the transformed signal (the relative variance scales 
        of the components) but can sometime improve the predictive accuracy of the downstream estimators
        by making their data respect some hard-wired assumptions.
    - svd_solver : string {‘auto’, ‘full’, ‘arpack’, ‘randomized’}
        * auto :
            the solver is selected by a default policy based on X.shape and n_components: 
            if the input data is larger than 500x500 and the number of components to extract is lower
            than 80% of the smallest dimension of the data, then the more efficient ‘randomized’ method
            is enabled. Otherwise the exact full SVD is computed and optionally truncated afterwards.
        * full :
            run exact full SVD calling the standard LAPACK solver via scipy.linalg.svd and select 
            the components by postprocessing
    - arpack :
        run SVD truncated to n_components calling ARPACK solver via scipy.sparse.linalg.svds. 
        It requires strictly 0 < n_components < X.shape[1]
    - randomized :
        run randomized SVD by the method of Halko et al.
    - tol : float >= 0, optional (default .0)
        Tolerance for singular values computed by svd_solver == ‘arpack’.
    - iterated_power : int >= 0, or ‘auto’, (default ‘auto’)
        Number of iterations for the power method computed by svd_solver == ‘randomized’.
    - random_state : int, RandomState instance or None, optional (default None)
        If int, random_state is the seed used by the random number generator; If RandomState instance, 
        random_state is the random number generator; If None, the random number generator is the RandomState 
        instance used by np.random. Used when svd_solver == ‘arpack’ or ‘randomized’.
"""

from sklearn.decomposition import PCA
pca = PCA(n_components=3)
fit = pca.fit(X)