[View in Colaboratory](https://colab.research.google.com/github/danielcanueto/misc/blob/master/particle_swarm_optimization.ipynb)

In [5]:
!pip install pyswarms


Collecting pyswarms
[?25l  Downloading https://files.pythonhosted.org/packages/f0/38/07a472a2aac09dd27c9a502f47da658247755f9d80e34112e9d4f157d380/pyswarms-0.2.1-py2.py3-none-any.whl (66kB)
[K    100% |████████████████████████████████| 71kB 6.2MB/s 
Collecting pytest==3.2.1 (from pyswarms)
[?25l  Downloading https://files.pythonhosted.org/packages/e0/1e/d52c6a3a143935410ee33320341ea7bbb770ca8fe89c3d51e18254e0a2ba/pytest-3.2.1-py2.py3-none-any.whl (186kB)
[K    100% |████████████████████████████████| 194kB 24.2MB/s 
[?25hCollecting PyYAML==3.12 (from pyswarms)
[?25l  Downloading https://files.pythonhosted.org/packages/4a/85/db5a2df477072b2902b0eb892feb37d88ac635d36245a72a6a69b23b383a/PyYAML-3.12.tar.gz (253kB)
[K    100% |████████████████████████████████| 256kB 27.6MB/s 
Collecting attrs==18.1.0 (from pyswarms)
  Downloading https://files.pythonhosted.org/packages/41/59/cedf87e91ed541be7957c501a92102f9cc6363c623a7666d69d51c78ac5b/attrs-18.1.0-py2.py3-none-any.whl
Collecting py>=1.

In [0]:

import numpy as np

from time import time
from scipy.stats import randint as sp_randint

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

import pyswarms as ps

In [0]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

# get some data
digits = load_digits()
X, y = digits.data, digits.target


In [0]:
# function we are attempting to optimize (minimize)
def func1(x):
    
    if int(x[0])>5:
      dummy=None
    else:
      dummy=int(x[0])
    
    if int(x[5])>0.5:
      dummy2='gini'
    else:
      dummy2='entropy'
      
    clf = RandomForestClassifier(n_estimators=20,max_depth=dummy,max_features=int(x[1]),min_samples_split=int(x[2]),
                            min_samples_leaf=int(x[3]),bootstrap=int(x[4]),criterion=dummy2)
    total=-np.mean(cross_validation.cross_val_score(clf, X, y))
    return total
  
def f(x):
    n_particles = x.shape[0]
    j = [func1(x[i]) for i in range(n_particles)]
    return np.array(j)

In [0]:
bounds=(np.array([1,1,2,1,0,0]),np.array([10,11,11,11,1,1]))

In [71]:
# Initialize swarm
options = {'c1': 0.5, 'c2': 0.3, 'w':0.9}

# Call instance of PSO with bounds argument
optimizer = ps.single.GlobalBestPSO(n_particles=10, dimensions=6, options=options, bounds=bounds)
# Perform optimization
cost, pos = optimizer.optimize(f, iters=30,print_step=5,verbose=3)





INFO:pyswarms.single.global_best:Iteration 1/30, cost: -0.9332095760692222
INFO:pyswarms.single.global_best:Iteration 6/30, cost: -0.9337494759047589
INFO:pyswarms.single.global_best:Iteration 11/30, cost: -0.9387857223694774
INFO:pyswarms.single.global_best:Iteration 16/30, cost: -0.9387857223694774
INFO:pyswarms.single.global_best:Iteration 21/30, cost: -0.9393450344036397
INFO:pyswarms.single.global_best:Iteration 26/30, cost: -0.9393450344036397
Optimization finished!
Final cost: -0.9393
Best value: [ 7.202880 7.455621 2.292993 ...]



In [73]:
# build a classifier
clf = RandomForestClassifier(n_estimators=20)

# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

# run randomized search
n_iter_search = 20
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
                                   n_iter=n_iter_search)

start = time()
random_search.fit(X, y)


RandomizedSearchCV(cv=None, error_score='raise',
          estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
          fit_params=None, iid=True, n_iter=20, n_jobs=1,
          param_distributions={'bootstrap': [True, False], 'min_samples_leaf': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fe586eef090>, 'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fe586eefad0>, 'criterion': ['gini', 'entropy'], 'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fe586eef390>, 'max_depth': [3, None]},
          pre_dispatch='2*n_jobs', random_state=None, refit=

In [74]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'bootstrap': [True, False], 'min_samples_leaf': [1, 3, 10], 'min_samples_split': [2, 3, 10], 'criterion': ['gini', 'entropy'], 'max_features': [1, 3, 10], 'max_depth': [3, None]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [77]:
pos

array([7.20287957, 7.45562139, 2.2929928 , 2.12709407, 0.99154414,
       0.62732981])

In [78]:
clf = RandomForestClassifier(n_estimators=20,max_depth=None,max_features=7,min_samples_split=2,
                            min_samples_leaf=2,bootstrap=1,criterion="gini")
results=cross_validation.cross_val_score(clf, X, y,cv=20)
print('mean ' + str(np.mean(results)))
print('sd ' + str(np.std(results)))

mean 0.9483097999398771
sd 0.0351409050681803


In [76]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 10,
 'min_samples_leaf': 1,
 'min_samples_split': 3}

In [79]:
clf = RandomForestClassifier(n_estimators=20,max_depth=None,max_features=10,min_samples_split=3,
                            min_samples_leaf=1,bootstrap=False,criterion="gini")
results_2=cross_validation.cross_val_score(clf, X, y,cv=20)
print('mean ' + str(np.mean(results_2)))
print('sd ' + str(np.std(results_2)))

mean 0.9661581303704277
sd 0.02483466552729989
