###  Created by Luis A. Sanchez-Perez (alejand@umich.edu).
<p><span style="color:green"><b>Copyright &#169;</b> Do not distribute or use without authorization from author.</span></p>

### MLP Feature Sensitivity to Posterior Probability (FSPP)
Computes a wrapper feature ranking especifically designed for MLP neural networks using the algorithm proposed in
https://ieeexplore.ieee.org/abstract/document/5282531
and briefly compares to Mutual Information (MI) raking criterion

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_validate
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
from utils.fspp import get_fspp
from utils.mutual import MutualInfo
from utils.mutualI import MutualInfoI
from utils.reports import report_feature_ranking
import time

In [2]:
# Load dataset
dataset = datasets.load_breast_cancer()
print(dataset.feature_names, end="\n")
print(dataset.target_names)
predictors = dataset.data
responses = dataset.target

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
['malignant' 'benign']


In [3]:
# Splits into training/test sets
X, X_holdout, y, y_holdout = train_test_split(predictors, responses, test_size=0.3, stratify=responses)

In [4]:
# Defines model
sc = StandardScaler()
clf = MLPClassifier(hidden_layer_sizes=(30))
estimators = [('normalizer', sc), ('classifier', clf)]
pipe = Pipeline(estimators)
results = cross_validate(pipe, X, y, cv = 5, scoring = ['accuracy'], n_jobs=-1,
                         return_estimator=True, return_train_score=True)
print('\nTime training (Avg):', results['fit_time'].mean())
print('\nTraining Metrics: ')
print('Accuracy (Avg):', '%.2f' % results['train_accuracy'].mean())
print('\nValidation Metrics:')
print('Accuracy (Avg):', '%.2f' % results['test_accuracy'].mean())
best_pipe = results['estimator'][results['test_accuracy'].argmin()]
y_pred = best_pipe.predict(X_holdout)
print('\nTest Metrics:')
print('Accuracy:', '%.2f' % accuracy_score(y_pred,y_holdout))


Time training (Avg): 0.187815523147583

Training Metrics: 
Accuracy (Avg): 0.99

Validation Metrics:
Accuracy (Avg): 0.97

Test Metrics:
Accuracy: 0.97


In [5]:
start = time.perf_counter()
rank = get_fspp(best_pipe, X)
print('Elpased time:', time.perf_counter() - start, end='\n\n')
report_feature_ranking(rank, dataset.feature_names, 10)

Elpased time: 0.01784449999999982

Feature ranked 1 is 'worst smoothness' with value 6.01E-02
Feature ranked 2 is 'worst symmetry' with value 5.02E-02
Feature ranked 3 is 'worst concavity' with value 4.85E-02
Feature ranked 4 is 'mean concavity' with value 4.59E-02
Feature ranked 5 is 'mean texture' with value 4.29E-02
.
.
.

Feature ranked 26 is 'symmetry error' with value 8.09E-03
Feature ranked 27 is 'smoothness error' with value 6.80E-03
Feature ranked 28 is 'worst fractal dimension' with value 6.48E-03
Feature ranked 29 is 'concave points error' with value 5.75E-03
Feature ranked 30 is 'texture error' with value 4.58E-03


In [6]:
mi = MutualInfo(X,y,n_jobs=-1) 
start = time.perf_counter()
rank = mi.compute()
print('Elpased time:', time.perf_counter() - start, end='\n\n')
report_feature_ranking(rank,dataset.feature_names, 10)

Using parallel version
Elpased time: 3.7993387000000003

Feature ranked 1 is 'worst perimeter' with value 7.00E-01
Feature ranked 2 is 'worst radius' with value 6.54E-01
Feature ranked 3 is 'worst concave points' with value 6.45E-01
Feature ranked 4 is 'worst area' with value 6.21E-01
Feature ranked 5 is 'mean concave points' with value 6.11E-01
.
.
.

Feature ranked 26 is 'mean fractal dimension' with value 6.62E-02
Feature ranked 27 is 'symmetry error' with value 6.36E-02
Feature ranked 28 is 'fractal dimension error' with value 5.01E-02
Feature ranked 29 is 'texture error' with value 3.51E-02
Feature ranked 30 is 'smoothness error' with value 3.42E-02


In [7]:
mi = MutualInfoI(X,y,n_jobs=-1)
start = time.perf_counter()
rank = mi.compute()
print('Elpased time:', time.perf_counter() - start, end='\n\n')
report_feature_ranking(rank, dataset.feature_names, 10)

Using parallel version
Elpased time: 11.5133717

Feature ranked 1 is 'worst perimeter' with value 1.92E-01
Feature ranked 2 is 'worst radius' with value 1.46E-01
Feature ranked 3 is 'worst concave points' with value 1.37E-01
Feature ranked 4 is 'worst area' with value 1.13E-01
Feature ranked 5 is 'mean concave points' with value 1.03E-01
.
.
.

Feature ranked 26 is 'mean fractal dimension' with value -4.42E-01
Feature ranked 27 is 'symmetry error' with value -4.44E-01
Feature ranked 28 is 'fractal dimension error' with value -4.58E-01
Feature ranked 29 is 'texture error' with value -4.73E-01
Feature ranked 30 is 'smoothness error' with value -4.74E-01
