###  Created by Luis Alejandro (alejand@umich.edu)
Applies the Mutual Information I (MII) ranking criterion to the wine dataset

In [1]:
import time
from sklearn import datasets
from sklearn.feature_selection import mutual_info_classif
import sys
sys.path.append('../')
from utils.feature_selection.mutual import MutualInfo
from utils.feature_selection.mutualI import MutualInfoI
from utils.feature_selection.reports import report_feature_ranking

In [2]:
# Loads and pre-process dataset
dataset = datasets.load_wine()
predictors_names = dataset['feature_names']
responses_names = dataset['target_names']
predictors = dataset['data']
responses = dataset['target'].flatten()

In [3]:
# Using Mutual I criterion
start = time.perf_counter()
mi = MutualInfoI(predictors,responses,n_jobs = 4)
mi.compute()
end = time.perf_counter()
print('Elapsed time:',end - start)

Using parallel version
Elapsed time: 3.601330099999984


In [4]:
report_feature_ranking(mi.info,predictors_names,20)

Feature ranked 1 is (flavanoids) with value 0.251538
Feature ranked 2 is (proline) with value 0.100217
Feature ranked 3 is (od280/od315_of_diluted_wines) with value 0.056158
Feature ranked 4 is (color_intensity) with value 0.021773
Feature ranked 5 is (hue) with value -0.053057
Feature ranked 6 is (alcohol) with value -0.061872
Feature ranked 7 is (total_phenols) with value -0.113132
Feature ranked 8 is (malic_acid) with value -0.243362
Feature ranked 9 is (proanthocyanins) with value -0.335631
Feature ranked 10 is (alcalinity_of_ash) with value -0.364610
Feature ranked 11 is (nonflavanoid_phenols) with value -0.414741
Feature ranked 12 is (magnesium) with value -0.416945
Feature ranked 13 is (ash) with value -0.511622


In [5]:
# Using MI criterion
start = time.perf_counter()
mi = MutualInfo(predictors,responses)
mi.compute()
end = time.perf_counter()
print('Elapsed time:',end - start)

Using basic matrix version
Elapsed time: 0.006726399999990917


In [6]:
report_feature_ranking(mi.info,predictors_names,20)

Feature ranked 1 is (flavanoids) with value 0.987925
Feature ranked 2 is (proline) with value 0.836604
Feature ranked 3 is (od280/od315_of_diluted_wines) with value 0.792544
Feature ranked 4 is (color_intensity) with value 0.758160
Feature ranked 5 is (hue) with value 0.683330
Feature ranked 6 is (alcohol) with value 0.674514
Feature ranked 7 is (total_phenols) with value 0.623255
Feature ranked 8 is (malic_acid) with value 0.493024
Feature ranked 9 is (proanthocyanins) with value 0.400756
Feature ranked 10 is (alcalinity_of_ash) with value 0.371777
Feature ranked 11 is (nonflavanoid_phenols) with value 0.321646
Feature ranked 12 is (magnesium) with value 0.319442
Feature ranked 13 is (ash) with value 0.224764


In [7]:
# Using MI sklearn implementation
start = time.perf_counter()
mi = mutual_info_classif(predictors,responses)
end = time.perf_counter()
print('Elpased time:', end-start)

Elpased time: 0.04240390000001071


In [8]:
# Reports result
report_feature_ranking(mi, predictors_names, 20)

Feature ranked 1 is (flavanoids) with value 0.667668
Feature ranked 2 is (color_intensity) with value 0.553694
Feature ranked 3 is (proline) with value 0.552871
Feature ranked 4 is (od280/od315_of_diluted_wines) with value 0.521648
Feature ranked 5 is (hue) with value 0.475773
Feature ranked 6 is (alcohol) with value 0.465262
Feature ranked 7 is (total_phenols) with value 0.419591
Feature ranked 8 is (proanthocyanins) with value 0.291434
Feature ranked 9 is (malic_acid) with value 0.275868
Feature ranked 10 is (alcalinity_of_ash) with value 0.235863
Feature ranked 11 is (magnesium) with value 0.161904
Feature ranked 12 is (nonflavanoid_phenols) with value 0.114326
Feature ranked 13 is (ash) with value 0.066720
