# Import Libraries

In [3]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EXS

# Load Data

In [4]:
wine = load_wine()
data = wine['data']
target = wine['target']

# Split Data

In [6]:
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.3,random_state=0)

# Model

In [7]:
m = DecisionTreeClassifier(min_samples_leaf=20)

# Feature Selections

## SequentialFeatureSelector

In [42]:
sfs = SFS(m,
         forward=False, #backward, for forward simply set it to True
         cv = 10,
         k_features = (2,6), # specify min and max number of fetaures for the end model
         scoring='accuracy',
         verbose=False,
         n_jobs=-1)

In [43]:
sfs.fit(X_train,y_train, custom_feature_names=wine['feature_names'])

SequentialFeatureSelector(clone_estimator=True, cv=10,
             estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=20, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
             fixed_features=None, floating=False, forward=False,
             k_features=(2, 6), n_jobs=-1, pre_dispatch='2*n_jobs',
             scoring='accuracy', verbose=False)

In [40]:
print(f"Best score achieved: {sfs.k_score_}, Feature's names: {sfs.k_feature_names_}")

Best score achieved: 0.8869463869463869, Feature's names: ('alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'od280/od315_of_diluted_wines')


In [41]:
display(pd.DataFrame(sfs.get_metric_dict()))

Unnamed: 0,13,12,11,10,9,8,7,6,5,4,3,2
feature_idx,"(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)","(0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12)","(0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11)","(0, 1, 2, 3, 4, 5, 7, 8, 10, 11)","(0, 1, 2, 3, 4, 5, 7, 8, 11)","(0, 1, 2, 3, 4, 5, 7, 11)","(0, 1, 2, 3, 4, 5, 11)","(0, 1, 2, 3, 4, 11)","(0, 1, 2, 3, 11)","(0, 1, 2, 11)","(0, 1, 11)","(0, 11)"
cv_scores,"[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923...","[0.7692307692307693, 0.8461538461538461, 0.923..."
avg_score,0.877448,0.885781,0.885781,0.886946,0.886946,0.886946,0.886946,0.886946,0.886946,0.886946,0.886946,0.886946
feature_names,"(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, alcalinity_of_ash, ...","(alcohol, malic_acid, ash, od280/od315_of_dilu...","(alcohol, malic_acid, od280/od315_of_diluted_w...","(alcohol, od280/od315_of_diluted_wines)"
ci_bound,0.0500033,0.0493915,0.0493915,0.0542853,0.0542853,0.0542853,0.0542853,0.0542853,0.0542853,0.0542853,0.0542853,0.0542853
std_dev,0.0673252,0.0665015,0.0665015,0.0730905,0.0730905,0.0730905,0.0730905,0.0730905,0.0730905,0.0730905,0.0730905,0.0730905
std_err,0.0224417,0.0221672,0.0221672,0.0243635,0.0243635,0.0243635,0.0243635,0.0243635,0.0243635,0.0243635,0.0243635,0.0243635


## ExhaustiveFeatureSelector

In [24]:
efs = EXS(m,
          min_features=2,
          max_features=6,
          cv = 10,
          scoring='accuracy')

In [25]:
efs.fit(X_train,y_train, custom_feature_names=wine['feature_names'])

Features: 4082/4082

ExhaustiveFeatureSelector(clone_estimator=True, cv=10,
             estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=20, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best'),
             max_features=6, min_features=2, n_jobs=1,
             pre_dispatch='2*n_jobs', print_progress=True,
             scoring='accuracy')

In [26]:
print(f"Best score achieved: {efs.best_score_}, Feature's names: {efs.best_feature_names_}")

Best score achieved: 0.8869463869463869, Feature's names: ('alcohol', 'od280/od315_of_diluted_wines')


In [27]:
display(pd.DataFrame(efs.get_metric_dict()))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4072,4073,4074,4075,4076,4077,4078,4079,4080,4081
feature_idx,"(0, 1)","(0, 2)","(0, 3)","(0, 4)","(0, 5)","(0, 6)","(0, 7)","(0, 8)","(0, 9)","(0, 10)",...,"(5, 7, 8, 10, 11, 12)","(5, 7, 9, 10, 11, 12)","(5, 8, 9, 10, 11, 12)","(6, 7, 8, 9, 10, 11)","(6, 7, 8, 9, 10, 12)","(6, 7, 8, 9, 11, 12)","(6, 7, 8, 10, 11, 12)","(6, 7, 9, 10, 11, 12)","(6, 8, 9, 10, 11, 12)","(7, 8, 9, 10, 11, 12)"
cv_scores,"[0.6153846153846154, 0.6923076923076923, 0.769...","[0.46153846153846156, 0.6923076923076923, 0.53...","[0.7692307692307693, 0.6923076923076923, 0.769...","[0.38461538461538464, 0.6923076923076923, 0.61...","[0.7692307692307693, 0.6923076923076923, 0.769...","[0.7692307692307693, 0.7692307692307693, 0.769...","[0.7692307692307693, 0.6923076923076923, 0.692...","[0.5384615384615384, 0.6923076923076923, 0.615...","[0.5384615384615384, 0.7692307692307693, 0.846...","[0.6153846153846154, 0.7692307692307693, 0.692...",...,"[0.6153846153846154, 0.8461538461538461, 0.846...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.6153846153846154, 0.7692307692307693, 0.846...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923...","[0.8461538461538461, 0.9230769230769231, 0.923..."
avg_score,0.794114,0.679138,0.817949,0.679138,0.842308,0.832576,0.767832,0.762704,0.7162,0.810897,...,0.812995,0.885781,0.885781,0.877448,0.877448,0.877448,0.805303,0.877448,0.877448,0.885781
feature_names,"(alcohol, malic_acid)","(alcohol, ash)","(alcohol, alcalinity_of_ash)","(alcohol, magnesium)","(alcohol, total_phenols)","(alcohol, flavanoids)","(alcohol, nonflavanoid_phenols)","(alcohol, proanthocyanins)","(alcohol, color_intensity)","(alcohol, hue)",...,"(total_phenols, nonflavanoid_phenols, proantho...","(total_phenols, nonflavanoid_phenols, color_in...","(total_phenols, proanthocyanins, color_intensi...","(flavanoids, nonflavanoid_phenols, proanthocya...","(flavanoids, nonflavanoid_phenols, proanthocya...","(flavanoids, nonflavanoid_phenols, proanthocya...","(flavanoids, nonflavanoid_phenols, proanthocya...","(flavanoids, nonflavanoid_phenols, color_inten...","(flavanoids, proanthocyanins, color_intensity,...","(nonflavanoid_phenols, proanthocyanins, color_..."
ci_bound,0.0892522,0.0989499,0.062881,0.105341,0.0668981,0.0530701,0.0538971,0.0973812,0.0779296,0.0811395,...,0.0784407,0.0493915,0.0493915,0.0500033,0.0500033,0.0500033,0.0785195,0.0500033,0.0500033,0.0493915
std_dev,0.12017,0.133228,0.0846639,0.141832,0.0900727,0.0714544,0.0725679,0.131115,0.104926,0.109247,...,0.105614,0.0665015,0.0665015,0.0673252,0.0673252,0.0673252,0.10572,0.0673252,0.0673252,0.0665015
std_err,0.0400568,0.0444092,0.0282213,0.0472775,0.0300242,0.0238181,0.0241893,0.0437052,0.0349752,0.0364158,...,0.0352046,0.0221672,0.0221672,0.0224417,0.0224417,0.0224417,0.0352399,0.0224417,0.0224417,0.0221672
