In [1]:
from sklearn.feature_selection import VarianceThreshold
help(VarianceThreshold)

Help on class VarianceThreshold in module sklearn.feature_selection.variance_threshold:

class VarianceThreshold(sklearn.base.BaseEstimator, sklearn.feature_selection.base.SelectorMixin)
 |  VarianceThreshold(threshold=0.0)
 |  
 |  Feature selector that removes all low-variance features.
 |  
 |  This feature selection algorithm looks only at the features (X), not the
 |  desired outputs (y), and can thus be used for unsupervised learning.
 |  
 |  Read more in the :ref:`User Guide <variance_threshold>`.
 |  
 |  Parameters
 |  ----------
 |  threshold : float, optional
 |      Features with a training-set variance lower than this threshold will
 |      be removed. The default is to keep all features with non-zero variance,
 |      i.e. remove the features that have the same value in all samples.
 |  
 |  Attributes
 |  ----------
 |  variances_ : array, shape (n_features,)
 |      Variances of individual features.
 |  
 |  Examples
 |  --------
 |  The following dataset has integer f

In [2]:
# 定义数据矩阵
X = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]
X

array([[0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 1],
       [0, 1, 0],
       [0, 1, 1]])

In [3]:
# 构建VarianceThreshold特征选择实例
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
# 学习矩阵X各列经验方差并进行特征选择
sel.fit_transform(X)

array([[0, 1],
       [1, 0],
       [0, 0],
       [1, 1],
       [1, 0],
       [1, 1]])

In [4]:
# 返回选出特征的整数索引列表
sel.get_support(indices=True)

array([1, 2], dtype=int64)

In [5]:
# 返回选出特征的mask
X[:,sel.get_support()]

array([[0, 1],
       [1, 0],
       [0, 0],
       [1, 1],
       [1, 0],
       [1, 1]])