In [1]:
# The Recursive Feature Elimination (or RFE) works by recursively removing attributes 
# and building a model on those attributes that remain.
# It uses the model to identify which attributes 
# contribute the most to predicting the target attribute.

from pandas import read_csv
from sklearn.feature_selection import RFE
from sklearn.svm import SVR

In [2]:
# load data
# The datasets consist of several medical predictor (independent) variables and 
# one target (dependent) variable, Outcome. 
# Independent variables include the number of pregnancies the patient has had, 
# their BMI, insulin level, age, and so on.
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv"
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(url, names=names)
dataframe

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [3]:
# Put data instances to arrays
# X: training features
# Y: classes
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

In [4]:
# feature extraction
model = SVR(kernel="linear")
# Select best 3 attributes
rfe = RFE(model, n_features_to_select=3)
fit = rfe.fit(X, Y)
print("Num Features: %d" % fit.n_features_)
print("Selected Features: %s" % fit.support_)
print("Feature Ranking: %s" % fit.ranking_)

Num Features: 3
Selected Features: [ True False False False False  True  True False]
Feature Ranking: [1 2 3 5 6 1 1 4]


In [5]:
# Show the selected features
print('Selected features:')
feats = list(dataframe.columns.values)
for i, rank in enumerate(fit.ranking_):
    if rank == 1:
        print(feats[i])


Selected features:
preg
mass
pedi
