## Feature Selection using Fisher Rank (LDA)

In [47]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder #string labels to int
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA as PCA
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, mutual_info_classif

### Load the features
random dataframe for now

In [2]:
df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD'))

Put values and labels of the dataframe into matrices

In [3]:
X = df.values
y = np.random.randint(2, size=100)

### Apply LDA

In [22]:
lda = LDA()
lda.fit(X, y)
scores_lda = lda.scalings_

In [23]:
scores_lda

array([[ 0.01632206],
       [ 0.00575785],
       [ 0.02698123],
       [ 0.01455092]])

### Apply PCA
linearly transforms initial features into a new space therefore scores does not directly correspond to the initial 4 features we have but they correspont to the vectors that combine the 4 features

In [35]:
pca = PCA()
pca.fit(X, y)
scores_pca = pca.explained_variance_ratio_ #percentage of explained variance per component

In [36]:
scores_pca

array([ 0.30298467,  0.2527331 ,  0.24255509,  0.20172714])

### Apply Feature Selection with chi-squared
X must contoin non-negative values

In [43]:
chi = SelectKBest(chi2, k=X.shape[1]).fit(X, y)
scores_chi = chi.scores_

In [44]:
scores_chi

array([  7.21895425,   0.16736318,  22.34503914,   5.0170975 ])

### Feature selection with Mutual Info
relies on nonparametric methods based on entropy estimation from k-nearest neighbors distances

In [53]:
mutin = SelectKBest(mutual_info_classif, k=X.shape[1]).fit(X, y)
scores_mutin = mutin.scores_

In [54]:
scores_mutin

array([ 0.02918408,  0.        ,  0.03662853,  0.        ])