###  Created by Luis A. Sanchez-Perez (alejand@umich.edu).
<p><span style="color:green"><b>Copyright &#169;</b> Do not distribute or use without authorization from author.</span></p>

Applies the Mutual Information I (MII) ranking criterion to the wine dataset

In [1]:
import time
from sklearn import datasets
from sklearn.feature_selection import mutual_info_classif
from utils.mutual import MutualInfo
from utils.mutualI import MutualInfoI
from utils.reports import report_feature_ranking

In [2]:
# Loads and pre-process dataset
dataset = datasets.load_wine()
predictors_names = dataset['feature_names']
responses_names = dataset['target_names']
predictors = dataset['data']
responses = dataset['target'].flatten()

In [3]:
# Using Mutual I criterion
start = time.perf_counter()
mi = MutualInfoI(predictors, responses, n_jobs=4)
mi.compute()
end = time.perf_counter()
print('Elapsed time:',end - start)

Using parallel version
Elapsed time: 3.8232242999999997


In [4]:
report_feature_ranking(mi.info, predictors_names, 20)

Feature ranked 1 is 'flavanoids' with value 2.52E-01
Feature ranked 2 is 'proline' with value 1.00E-01
Feature ranked 3 is 'od280/od315_of_diluted_wines' with value 5.62E-02
Feature ranked 4 is 'color_intensity' with value 2.18E-02
Feature ranked 5 is 'hue' with value -5.31E-02
Feature ranked 6 is 'alcohol' with value -6.19E-02
Feature ranked 7 is 'total_phenols' with value -1.13E-01
Feature ranked 8 is 'malic_acid' with value -2.43E-01
Feature ranked 9 is 'proanthocyanins' with value -3.36E-01
Feature ranked 10 is 'alcalinity_of_ash' with value -3.65E-01
Feature ranked 11 is 'nonflavanoid_phenols' with value -4.15E-01
Feature ranked 12 is 'magnesium' with value -4.17E-01
Feature ranked 13 is 'ash' with value -5.12E-01


In [5]:
# Using MI criterion
start = time.perf_counter()
mi = MutualInfo(predictors,responses)
mi.compute()
end = time.perf_counter()
print('Elapsed time:',end - start)

Using basic matrix version
Elapsed time: 0.005201399999999801


In [6]:
report_feature_ranking(mi.info, predictors_names, 20)

Feature ranked 1 is 'flavanoids' with value 9.88E-01
Feature ranked 2 is 'proline' with value 8.37E-01
Feature ranked 3 is 'od280/od315_of_diluted_wines' with value 7.93E-01
Feature ranked 4 is 'color_intensity' with value 7.58E-01
Feature ranked 5 is 'hue' with value 6.83E-01
Feature ranked 6 is 'alcohol' with value 6.75E-01
Feature ranked 7 is 'total_phenols' with value 6.23E-01
Feature ranked 8 is 'malic_acid' with value 4.93E-01
Feature ranked 9 is 'proanthocyanins' with value 4.01E-01
Feature ranked 10 is 'alcalinity_of_ash' with value 3.72E-01
Feature ranked 11 is 'nonflavanoid_phenols' with value 3.22E-01
Feature ranked 12 is 'magnesium' with value 3.19E-01
Feature ranked 13 is 'ash' with value 2.25E-01


In [7]:
# Using MI sklearn implementation
start = time.perf_counter()
mi = mutual_info_classif(predictors,responses)
end = time.perf_counter()
print('Elpased time:', end-start)

Elpased time: 0.04843360000000008


In [8]:
# Reports result
report_feature_ranking(mi, predictors_names, 20)

Feature ranked 1 is 'flavanoids' with value 6.63E-01
Feature ranked 2 is 'proline' with value 5.60E-01
Feature ranked 3 is 'color_intensity' with value 5.48E-01
Feature ranked 4 is 'od280/od315_of_diluted_wines' with value 5.14E-01
Feature ranked 5 is 'alcohol' with value 4.80E-01
Feature ranked 6 is 'hue' with value 4.48E-01
Feature ranked 7 is 'total_phenols' with value 4.14E-01
Feature ranked 8 is 'proanthocyanins' with value 2.92E-01
Feature ranked 9 is 'malic_acid' with value 2.88E-01
Feature ranked 10 is 'alcalinity_of_ash' with value 2.55E-01
Feature ranked 11 is 'magnesium' with value 1.90E-01
Feature ranked 12 is 'nonflavanoid_phenols' with value 1.24E-01
Feature ranked 13 is 'ash' with value 5.17E-02
