In [1]:
import numpy as np
import pandas as pd
import p2pspatial
import pyswarm

import sklearn.base as sklb
import sklearn.model_selection as sklms
import sklearn.metrics as sklm
import sklearn.utils.validation as skluv

2018-01-05 00:56:54,918 [pulse2percept] [INFO] Welcome to pulse2percept


In [2]:
class DummyRegressor(sklb.BaseEstimator, sklb.RegressorMixin):
    def __init__(self, a=1):
        self.a = a
        
    def fit(self, X, y=None, **fit_params):
        return self
    
    def predict(self, X):
        y_pred = []
        for i, _ in X.iterrows():
            y_pred.append(i * self.a)
        return np.array(y_pred)

In [3]:
class ParticleSwarmOptimizer(sklb.BaseEstimator, sklb.RegressorMixin):
    def __init__(self, estimator, search_params, swarm_size=None,
                 min_func=1e-4, verbose=True):
        """Performs particle swarm optimization
        
        Parameters
        ----------
        estimator :
            A scikit-learn estimator. Make sure its scoring function has
            greater equals better.
        search_params : dict of tupels (lower bound, upper bound)
            Search parameters
        swarm_size : int, optional, default: 10 * number of search params
            Swarm size
        min_func : float, optional, default: 1e-4
            When to stop
        verbose : bool, optional, default: True
            Flag whether to print more stuff
        """
        if swarm_size is None:
            swarm_size = 10 * len(search_params)
        self.estimator = estimator
        self.search_params = search_params
        self.swarm_size = swarm_size
        self.min_func = min_func
        self.verbose = verbose
        
    def swarm_error(self, search_vals, X, y, fit_params={}):
        """Calculates the particle swarm error
        
        The error is calculated using the estimator's scoring function.
        """
        # pyswarm provides values for all search parameters in a list:
        # Need to pair these values with the names of the search params
        # to build a dict
        search_params = {}
        for k, v in zip(list(self.search_params.keys()), search_vals):
            search_params[k] = v
        
        # Clone the estimator to make sure we have a clean slate
        estimator = sklb.clone(self.estimator)
        estimator.set_params(**search_params)
        estimator.fit(X, y=y, **fit_params)
        
        # Scoring function: greater is better, so invert to get an
        # error function
        return -estimator.score(X, y)
        
    def fit(self, X, y, **fit_params):
        # Run particle swarm optimization
        lb = [v[0] for v in self.search_params.values()]
        ub = [v[1] for v in self.search_params.values()]
        best_vals, best_err = pyswarm.pso(
            self.swarm_error, lb, ub, swarmsize=self.swarm_size,
            minfunc=self.min_func, debug=self.verbose,
            args=[X, y], kwargs={'fit_params': fit_params}
        )
        
        # Pair values of best params with their names to build a dict
        self.best_params_ = {}
        for k, v in zip(list(self.search_params.keys()), best_vals):
            self.best_params_[k] = v
        print('Best err:', best_err, 'Best params:', self.best_params_)
        
        # Fit the class attribute with best params
        self.estimator.set_params(**self.best_params_)
        self.estimator.fit(X, y=y, **fit_params)
    
    def predict(self, X):
        msg = "Estimator, %(name)s, must be fitted before predicting."
        skluv.check_is_fitted(self, "best_params_", msg=msg)
        return self.estimator.predict(X)

In [4]:
dummy = DummyRegressor(a=1)

In [5]:
search_params = {'a': (-10, 10)}
pso = ParticleSwarmOptimizer(dummy, search_params, debug=False)
print(pso)

ParticleSwarmOptimizer(debug=False, estimator=DummyRegressor(a=1),
            min_func=0.0001, search_params={'a': (-10, 10)}, swarm_size=10)


In [6]:
n_samples = 10
X = pd.DataFrame(np.repeat(np.arange(n_samples), 2).reshape((-1, 2)), columns=['feat1', 'feat2'])
y = pd.DataFrame(np.arange(n_samples), columns=['target'])

In [7]:
pso.fit(X, y=y)

Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999997111416 Best params: {'a': 1.0009144227905247}


In [9]:
pso.score(X, y)

0.99999711141604419

In [11]:
X_test, y_test, y_pred = p2pspatial.model_selection.crossval_predict(pso, X, y, n_folds=2)

Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999999688618 Best params: {'a': 0.99988949642021452}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999999811869 Best params: {'a': 1.0002504203271629}


In [12]:
y_pred

[array([ 0.        ,  0.9998895 ,  1.99977899,  2.99966849,  3.99955799]),
 array([ 5.0012521 ,  6.00150252,  7.00175294,  8.00200336,  9.00225378])]

In [14]:
y_test

[   target
 0       0
 1       1
 2       2
 3       3
 4       4,    target
 5       5
 6       6
 7       7
 8       8
 9       9]