In [53]:
import numpy as np
import pandas as pd
import pickle

from src.get_data import load_breast_cancer, load_glass, load_iris, load_skin, load_statlog_shuttle, load_wine
from src.models import train_logistic_regression, train_multilayer_perceptron, train_fuzzy_rbf_nn

from matplotlib import pyplot as plt

seed=42

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Preliminar results

In [120]:
data = {'dataset':['Iris', 'Wine', 'Breast', 'Glass', 'StatlogShuttle', 'SkinSeg'],
        #'SOFTMAX_GD_FRBF': [90.25, 98.69, 92.03, 75.02],
        'PBL_FRBF': [96.75, 98.04, 89.56, 92.45,0,0],
        'FRBF': [93.89, 96.81, 85.19, 91.56,0,0],
        'RBF': [90.65, 95.89, 81.94, 90.56,0,0],
        'LogReg': [],
        'MLP': [],
        'SOFTMAX_GD_FRBF': []
       }

data = {
 'dataset': ['Iris', 'Wine', 'Breast', 'Glass', 'StatlogShuttle', 'SkinSeg'],
 'PBL_FRBF': [96.75, 98.04, 89.56, 92.45, 0, 0],
 'FRBF': [93.89, 96.81, 85.19, 91.56, 0, 0],
 'RBF': [90.65, 95.89, 81.94, 90.56, 0, 0],
 'LogReg': [1.0, 1.0, 0.9785512994468217, 0.9833333333333334, 0.9548445863831603, 0.8846160017882521],
 'MLP': [1.0, 0.9841269841269842, 0.986013986013986, 0.9634502923976608, 0.9995039682539683, 0.9864555669586725],
 'SOFTMAX_GD_FRBF': [0.923076923076923, 1.0, 0.9750547959503183, 0.8130341880341881, 0.8627325638028444, 0.47421947277637494]
}

## 1. Logistic Regression
#### 1.1 Iris dataset

In [93]:
X, y = load_iris()

param_grid = {
    'penalty' : ['l2'],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'C' : np.logspace(-4, 4, 40)
}
#best_found_params
param_grid = {'C': [34.55107294592218], 'penalty': ['l2'], 'solver': ['newton-cg']}

metrics = train_logistic_regression(X, y, param_grid)
data['LogReg'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best: 0.952381 using {'C': 34.55107294592218, 'penalty': 'l2', 'solver': 'newton-cg'}


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.9s finished


#### 1.2 Wine Quality Dataset

In [95]:
X, y = load_wine()

param_grid = {
    'penalty' : ['l2'],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'C' : np.logspace(-4, 4, 40)
}
#best_found_params
param_grid = {'C': [8.376776400682925], 'penalty': ['l2'], 'solver': ['newton-cg']}

metrics = train_logistic_regression(X, y, param_grid)
data['LogReg'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best: 0.975803 using {'C': 8.376776400682925, 'penalty': 'l2', 'solver': 'newton-cg'}


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.4s finished


#### 1.3 Breast Cancer Dataset

In [96]:
X, y = load_breast_cancer()

param_grid = {
    'penalty' : ['l2'],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'C' : np.logspace(-4, 4, 40)
}
#best_found_params
param_grid = {'C': [0.00025719138090593444], 'penalty': ['l2'], 'solver': ['liblinear']}

metrics = train_logistic_regression(X, y, param_grid)
data['LogReg'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best: 0.969325 using {'C': 0.00025719138090593444, 'penalty': 'l2', 'solver': 'liblinear'}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.0s finished


#### 1.4 Glass Dataset

In [97]:
X, y = load_glass()

param_grid = {
    'penalty' : ['l2'],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'C' : np.logspace(-4, 4, 40)
}
#best_found_params
param_grid = {'C': [142.51026703029964], 'penalty': ['l2'], 'solver': ['newton-cg']}

metrics = train_logistic_regression(X, y, param_grid)
data['LogReg'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best: 0.939184 using {'C': 142.51026703029964, 'penalty': 'l2', 'solver': 'newton-cg'}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished


#### 1.5 Statlog Shuttle Dataset

In [98]:
X, y = load_statlog_shuttle()

param_grid = {
    'penalty' : ['l2'],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'C' : np.logspace(-4, 4, 40),
    'max_iter': [300]
}
#best_found_params
param_grid = {'C': [942.6684551178854], 'max_iter': [300], 'penalty': ['l2'], 'solver': ['newton-cg']}

metrics = train_logistic_regression(X, y, param_grid)
data['LogReg'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.8s finished


Best: 0.971015 using {'C': 942.6684551178854, 'max_iter': 300, 'penalty': 'l2', 'solver': 'newton-cg'}


#### 1.6 Skin Dataset

In [99]:
X, y = load_skin()

param_grid = {
    'penalty' : ['l2'],
    'solver' : ['newton-cg', 'lbfgs', 'liblinear'],
    'C' : np.logspace(-4, 4, 40)
}
#best_found_params
param_grid = {'C': [21.54434690031882], 'penalty': ['l2'], 'solver': ['liblinear']}

metrics = train_logistic_regression(X, y, param_grid)
data['LogReg'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.4s finished


Best: 0.918677 using {'C': 21.54434690031882, 'penalty': 'l2', 'solver': 'liblinear'}


## 2. MultiLayer Perceptron
#### 2.1 Iris dataset

In [61]:
X, y = load_iris()

param_grid = {
    'hidden_layer_sizes': [(3), (5), (3,3), (5,5)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [200]
}
#best_found_params
param_grid = {'activation': ['tanh'], 'alpha': [0.0001], 'hidden_layer_sizes': [(3)],
              'learning_rate': ['constant'], 'max_iter': [200], 'solver': ['lbfgs']}

metrics = train_multilayer_perceptron(X, y, param_grid)
data['MLP'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.3s finished


Best: 0.952381 using {'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': 3, 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'lbfgs'}


#### 2.2 Wine dataset

In [62]:
X, y = load_wine()

param_grid = {
    'hidden_layer_sizes': [(3), (5), (3,3), (5,5)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [200]
}
#best_found_params
param_grid = {'activation': ['logistic'], 'alpha': [0.001], 'hidden_layer_sizes': [(5)],
              'learning_rate': ['constant'], 'max_iter': [200], 'solver': ['lbfgs']}

metrics = train_multilayer_perceptron(X, y, param_grid)
data['MLP'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits
Best: 0.983740 using {'activation': 'logistic', 'alpha': 0.001, 'hidden_layer_sizes': 5, 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'lbfgs'}


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished


#### 2.3 Breast Cancer Dataset

In [63]:
X, y = load_breast_cancer()

param_grid = {
    'hidden_layer_sizes': [(3), (5), (3,3), (5,5)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [200]
}
#best_found_params
param_grid = {'activation': ['relu'], 'alpha': [0.0001], 'hidden_layer_sizes': [(5,5)],
              'learning_rate': ['constant'], 'max_iter': [200], 'solver': ['adam']}

metrics = train_multilayer_perceptron(X, y, param_grid)
data['MLP'].append(metrics[0])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.6s finished


Best: 0.969325 using {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (5, 5), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'adam'}




#### 2.4 Glass Dataset

In [64]:
X, y = load_glass()

param_grid = {
    'hidden_layer_sizes': [(3), (5), (3,3), (5,5)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [200]
}
#best_found_params
param_grid = {'activation': ['relu'], 'alpha': [0.05], 'hidden_layer_sizes': [(5,5)],
              'learning_rate': ['constant'], 'max_iter': [200], 'solver': ['lbfgs']}

metrics = train_multilayer_perceptron(X, y, param_grid)
data['MLP'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.4s finished
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Best: 0.959864 using {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (5, 5), 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'lbfgs'}


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


#### 2.5 Statlog Shuttle Dataset

In [65]:
X, y = load_statlog_shuttle()

param_grid = {
    'hidden_layer_sizes': [(3), (5), (3,3), (5,5)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [200]
}
#best_found_params
param_grid = {'activation': ['logistic'], 'alpha': [0.005], 'hidden_layer_sizes': [(5)],
              'learning_rate': ['constant'], 'max_iter': [200], 'solver': ['lbfgs']}

metrics = train_multilayer_perceptron(X, y, param_grid)
data['MLP'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.1s finished


Best: 1.000000 using {'activation': 'logistic', 'alpha': 0.005, 'hidden_layer_sizes': 5, 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'lbfgs'}


#### 2.6 Skin Dataset

In [66]:
X, y = load_skin()

param_grid = {
    'hidden_layer_sizes': [(3), (5), (3,3), (5,5)],
    'activation': ['logistic', 'tanh', 'relu'],
    'solver': ['lbfgs', 'adam'],
    'alpha': [0.0001, 0.005, 0.001, 0.005, 0.01, 0.05],
    'learning_rate': ['constant','adaptive'],
    'max_iter': [200]
}
#best_found_params
param_grid = {'activation': ['tanh'], 'alpha': [0.01], 'hidden_layer_sizes': [(5)],
              'learning_rate': ['constant'], 'max_iter': [200], 'solver': ['lbfgs']}

metrics = train_multilayer_perceptron(X, y, param_grid)
data['MLP'].append(metrics[0])

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished


Best: 0.998519 using {'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': 5, 'learning_rate': 'constant', 'max_iter': 200, 'solver': 'lbfgs'}


## 3. Fuzzy Radial-Basis Function
#### 3.1 Iris dataset

In [119]:
X, y = load_iris()

n_labels = len(y.unique())
param_grid = {
    'lr': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05],
    'epochs': [5000, 10000, 12000, 15000, 20000],
    'centroids': np.arange(n_labels, n_labels*3),
    'm': np.linspace(1, 2, 3)
}
#best_found_params
param_grid = {'centroids': 5, 'epochs': 15000, 'lr': 0.0005, 'm': 2.0}

metrics = train_fuzzy_rbf_nn(X, y, param_grid, hyper=False)
print(metrics[0])
#data['SOFTMAX_GD_FRBF'].append(metrics[0])

0.923076923076923


#### 3.2 Wine dataset

In [69]:
X, y = load_wine()

n_labels = len(y.unique())
param_grid = {
    'lr': [0.0001, 0.0005, 0.001, 0.005, 0.01],
    'epochs': [5000, 10000, 15000],
    'centroids': np.arange(n_labels, n_labels*2),
    'm': np.linspace(1, 2, 3)
}

#best_found_params
param_grid = {'centroids': 3, 'epochs': 5000, 'lr': 0.0001, 'm': 2.0}

metrics = train_fuzzy_rbf_nn(X, y, param_grid, hyper=False)
print(metrics[0])
data['SOFTMAX_GD_FRBF'].append(metrics[0])

1.0


#### 3.3 Breast Cancer Dataset

In [72]:
X, y = load_breast_cancer()

n_labels = len(y.unique())
param_grid = {
    'lr': [0.0001, 0.0005, 0.001, 0.005, 0.01],
    'epochs': [5000, 10000, 15000],
    'centroids': np.arange(n_labels, n_labels*2),
    'm': np.linspace(1, 2, 3)
}

#best_found_params
#param_grid = {'centroids': [2], 'epochs': [5000], 'lr': [0.0001], 'm': [1.0]}
#best_found_params
#param_grid = {'centroids': 2, 'epochs': 5000, 'lr': 0.0001, 'm': 1.0}

metrics = train_fuzzy_rbf_nn(X, y, param_grid)
print(metrics[0])
data['SOFTMAX_GD_FRBF'].append(metrics[0])

Fitting 3 folds for each of 90 candidates, totalling 270 fits


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   35.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  5.2min
[Parallel(n_jobs=-1)]: Done 270 out of 270 | elapsed:  9.0min finished
  u = normalize_power_columns(d, - 2. / (m - 1))
  u = normalize_power_columns(d, - 2. / (m - 1))


Best: 0.000000 using {'centroids': 2, 'epochs': 5000, 'lr': 0.0001, 'm': 1.0}
0.9750547959503183


#### 3.4 Glass Dataset

In [84]:
X, y = load_glass()

n_labels = len(y.unique())
param_grid = {
    'lr': [0.0001, 0.0005, 0.001, 0.005, 0.01],
    'epochs': [5000, 10000, 15000],
    'centroids': np.arange(n_labels, n_labels*2),
    'm': np.linspace(1, 2, 3),
    'random_state':[13]
}

#best_found_params
#param_grid = {'centroids': [8], 'epochs': [15000], 'lr': [0.0001], 'm': [1.5], 'random_state':[13]}

#best_found_params
param_grid = {'centroids': 7, 'epochs': 20000, 'lr': 50e-3, 'm': 2}

metrics = train_fuzzy_rbf_nn(X, y, param_grid, hyper=False, seed=13)
#metrics = train_fuzzy_rbf_nn(X, y, param_grid, seed=13)
print(metrics[0])
#data['SOFTMAX_GD_FRBF'].append(metrics[0])

0.8130341880341881


#### 3.5 Statlog Shuttle Dataset

In [88]:
X, y = load_statlog_shuttle()

n_labels = len(y.unique())
param_grid = {
    'lr': [0.0001, 0.0005, 0.001, 0.005, 0.01],
    'epochs': [5000, 10000, 15000],
    'centroids': np.arange(n_labels, n_labels*2),
    'm': np.linspace(1, 2, 3)
}

#best_found_params
param_grid = {'centroids': 3, 'epochs': 10000, 'lr': 0.01, 'm': 2.0}

metrics = train_fuzzy_rbf_nn(X, y, param_grid, hyper=False)
print(metrics[0])
data['SOFTMAX_GD_FRBF'].append(metrics[0])

0.8627325638028444


#### 3.6 Skin Dataset

In [89]:
X, y = load_skin()

n_labels = len(y.unique())
param_grid = {
    'lr': [0.0001, 0.0005, 0.001, 0.005, 0.01],
    'epochs': [5000, 10000, 15000],
    'centroids': np.arange(n_labels, n_labels*2),
    'm': np.linspace(1, 2, 3)
}

#best_found_params
param_grid = {'centroids': 2, 'epochs': 5000, 'lr': 0.001, 'm': 2.0}

metrics = train_fuzzy_rbf_nn(X, y, param_grid, hyper=False)
print(metrics[0])
data['SOFTMAX_GD_FRBF'].append(metrics[0])

0.47421947277637494


In [132]:
data['LogReg'] = [x * 100 for x in data['LogReg']]
data['MLP'] = [x * 100 for x in data['MLP']]
data['SOFTMAX_GD_FRBF'] = [x * 100 for x in data['SOFTMAX_GD_FRBF']]

In [133]:
df = pd.DataFrame(data)
df.to_csv('results.csv', index=False)