## Algoritmo Evolutivo

#### Para traer desde el repo: pip install sklearn-deap

In [1]:
import sklearn.datasets
import numpy as np
import pandas as pd

from evolutionary_search import EvolutionaryAlgorithmSearchCV

In [2]:
data = sklearn.datasets.load_digits()
X = data["data"]
y = data["target"]

y = np.array([1 if yy == 5 else 0 for yy in y])

X.shape, y.shape

((1797, 64), (1797,))

In [3]:
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC

## Train an SVM with RBF kernel

### Using conventional GridSearchCV

Parameter grid: 625 parameter combinations

In [4]:
paramgrid = {"kernel": ["rbf"],
             "C"     : np.logspace(-9, 9, num=25, base=10),
             "gamma" : np.logspace(-9, 9, num=25, base=10)}

print("Size: ", len(paramgrid["kernel"])*len(paramgrid["C"])*len(paramgrid["gamma"]))

Size:  625


### Grid Search

In [5]:
grid_cv = GridSearchCV( estimator = SVC(),
                        param_grid = paramgrid,
                        scoring = "accuracy",
                        cv = StratifiedKFold(n_splits=2),
                        verbose = 1 )
grid_cv

GridSearchCV(cv=StratifiedKFold(n_splits=2, random_state=None, shuffle=False),
             error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jo...
       1.00000000e-03, 5.62341325e-03, 3.16227766e-02, 1.77827941e-01,
       1.00000000e+00, 5.62341325e+00, 3.16227766e+01, 1.77827941e+02,
       1.00000000e+03, 5.62341325e+03, 3.16227766e+04, 1.77827941e+05,
       1.00000000e+06, 5.62341325e+06, 3.16227766e+07, 1.77827941e+08,
       1.00000000e+09]),
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=1)

In [6]:
%time grid_cv.fit(X, y)

Fitting 2 folds for each of 625 candidates, totalling 1250 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Wall time: 2min 14s


[Parallel(n_jobs=1)]: Done 1250 out of 1250 | elapsed:  2.2min finished


GridSearchCV(cv=StratifiedKFold(n_splits=2, random_state=None, shuffle=False),
             error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jo...
       1.00000000e-03, 5.62341325e-03, 3.16227766e-02, 1.77827941e-01,
       1.00000000e+00, 5.62341325e+00, 3.16227766e+01, 1.77827941e+02,
       1.00000000e+03, 5.62341325e+03, 3.16227766e+04, 1.77827941e+05,
       1.00000000e+06, 5.62341325e+06, 3.16227766e+07, 1.77827941e+08,
       1.00000000e+09]),
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=1)

### Resultados

In [7]:
grid_cv.best_score_, grid_cv.best_params_

(0.9894268224819143, {'C': 1.0, 'gamma': 0.001, 'kernel': 'rbf'})

###  RandomizedSearchCV

#### Mismo espacio de hiperparametros pero solo testea 250

In [8]:
rand_cv = RandomizedSearchCV(   estimator=SVC(),
                                param_distributions =paramgrid,
                                n_iter = 250,
                                scoring = "accuracy",
                                cv = StratifiedKFold(n_splits=2),
                                verbose=1)
rand_cv

RandomizedSearchCV(cv=StratifiedKFold(n_splits=2, random_state=None, shuffle=False),
                   error_score='raise-deprecating',
                   estimator=SVC(C=1.0, cache_size=200, class_weight=None,
                                 coef0=0.0, decision_function_shape='ovr',
                                 degree=3, gamma='auto_deprecated',
                                 kernel='rbf', max_iter=-1, probability=False,
                                 random_state=None, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='warn...
       1.00000000e-03, 5.62341325e-03, 3.16227766e-02, 1.77827941e-01,
       1.00000000e+00, 5.62341325e+00, 3.16227766e+01, 1.77827941e+02,
       1.00000000e+03, 5.62341325e+03, 3.16227766e+04, 1.77827941e+05,
       1.00000000e+06, 5.62341325e+06, 3.16227766e+07, 1.77827941e+08,
       1.00000000e+09]),
                                        'kernel': ['rbf']},
                   pre_dispatch='2*n_

In [9]:
%time rand_cv.fit(X, y)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 2 folds for each of 250 candidates, totalling 500 fits
Wall time: 53.8 s


[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:   53.6s finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=2, random_state=None, shuffle=False),
                   error_score='raise-deprecating',
                   estimator=SVC(C=1.0, cache_size=200, class_weight=None,
                                 coef0=0.0, decision_function_shape='ovr',
                                 degree=3, gamma='auto_deprecated',
                                 kernel='rbf', max_iter=-1, probability=False,
                                 random_state=None, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='warn...
       1.00000000e-03, 5.62341325e-03, 3.16227766e-02, 1.77827941e-01,
       1.00000000e+00, 5.62341325e+00, 3.16227766e+01, 1.77827941e+02,
       1.00000000e+03, 5.62341325e+03, 3.16227766e+04, 1.77827941e+05,
       1.00000000e+06, 5.62341325e+06, 3.16227766e+07, 1.77827941e+08,
       1.00000000e+09]),
                                        'kernel': ['rbf']},
                   pre_dispatch='2*n_

### Resultados

In [10]:
rand_cv.best_score_, rand_cv.best_params_

(0.9894268224819143, {'kernel': 'rbf', 'gamma': 0.001, 'C': 1.0})

### EvolutionaryAlgorithmSearchCV

In [11]:
gen_cv = EvolutionaryAlgorithmSearchCV(estimator=SVC(),
                                       params=paramgrid,
                                       scoring="accuracy",
                                       cv=StratifiedKFold(n_splits=2),
                                       verbose=True,
                                       population_size=50,
                                       gene_mutation_prob=0.10,
                                       tournament_size=3,
                                       generations_number=100)

%time gen_cv.fit(X, y)

Types [1, 2, 2] and maxint [0, 24, 24] detected
--- Evolve in 625 possible combinations ---
gen	nevals	avg     	min    	max     	std      
0  	50    	0.911497	0.89872	0.984975	0.0293401
1  	29    	0.929405	0.89872	0.984975	0.0377837
2  	32    	0.95645 	0.89872	0.984975	0.0352351
3  	26    	0.976873	0.89872	0.984975	0.018252 
4  	33    	0.983784	0.980523	0.984975	0.00124438
5  	32    	0.981625	0.89872 	0.984975	0.0146572 
6  	38    	0.984975	0.984975	0.984975	1.11022e-16
7  	24    	0.98325 	0.89872 	0.984975	0.0120757  
8  	31    	0.982014	0.89872 	0.984975	0.0147086  
9  	38    	0.981525	0.89872 	0.984975	0.0169024  
10 	36    	0.983261	0.89872 	0.985531	0.0120775  
11 	32    	0.980312	0.89872 	0.985531	0.0187662  
12 	33    	0.984942	0.98108 	0.985531	0.000571961
13 	34    	0.979844	0.89872 	0.985531	0.0204962  
14 	31    	0.983884	0.923205	0.985531	0.00867183 
15 	30    	0.984652	0.952142	0.985531	0.004652   
16 	26    	0.981992	0.89872 	0.985531	0.0169988  
17 	28    	0.981747	0.898

### Best score + params

In [12]:
gen_cv.best_score_, gen_cv.best_params_

(0.9894268224819143, {'kernel': 'rbf', 'C': 1.0, 'gamma': 0.001})