In [1]:
import numpy as np
import pandas as pd
import p2pspatial
import pyswarm

import sklearn.base as sklb
import sklearn.model_selection as sklms
import sklearn.metrics as sklm
import sklearn.utils.validation as skluv

2018-01-05 15:23:35,702 [pulse2percept] [INFO] Welcome to pulse2percept


In [2]:
class DummyRegressor(sklb.BaseEstimator, sklb.RegressorMixin):
    def __init__(self, a=1):
        self.a = a
        
    def fit(self, X, y=None, **fit_params):
        return self
    
    def predict(self, X):
        y_pred = []
        for i, _ in X.iterrows():
            y_pred.append(i * self.a)
        return np.array(y_pred)

In [3]:
class ParticleSwarmOptimizer(sklb.BaseEstimator, sklb.RegressorMixin):
    def __init__(self, estimator, search_params, swarm_size=None,
                 min_func=1e-4, verbose=True):
        """Performs particle swarm optimization
        
        Parameters
        ----------
        estimator :
            A scikit-learn estimator. Make sure its scoring function has
            greater equals better.
        search_params : dict of tupels (lower bound, upper bound)
            Search parameters
        swarm_size : int, optional, default: 10 * number of search params
            Swarm size
        min_func : float, optional, default: 1e-4
            When to stop
        verbose : bool, optional, default: True
            Flag whether to print more stuff
        """
        if swarm_size is None:
            swarm_size = 10 * len(search_params)
        self.estimator = estimator
        self.search_params = search_params
        self.swarm_size = swarm_size
        self.min_func = min_func
        self.verbose = verbose
        
    def swarm_error(self, search_vals, X, y, fit_params={}):
        """Calculates the particle swarm error
        
        The error is calculated using the estimator's scoring function.
        """
        # pyswarm provides values for all search parameters in a list:
        # Need to pair these values with the names of the search params
        # to build a dict
        search_params = {}
        for k, v in zip(list(self.search_params.keys()), search_vals):
            search_params[k] = v
        
        # Clone the estimator to make sure we have a clean slate
        estimator = sklb.clone(self.estimator)
        estimator.set_params(**search_params)
        estimator.fit(X, y=y, **fit_params)
        
        # Scoring function: greater is better, so invert to get an
        # error function
        return -estimator.score(X, y)
        
    def fit(self, X, y, **fit_params):
        # Run particle swarm optimization
        lb = [v[0] for v in self.search_params.values()]
        ub = [v[1] for v in self.search_params.values()]
        best_vals, best_err = pyswarm.pso(
            self.swarm_error, lb, ub, swarmsize=self.swarm_size,
            minfunc=self.min_func, debug=self.verbose,
            args=[X, y], kwargs={'fit_params': fit_params}
        )
        
        # Pair values of best params with their names to build a dict
        self.best_params_ = {}
        for k, v in zip(list(self.search_params.keys()), best_vals):
            self.best_params_[k] = v
        print('Best err:', best_err, 'Best params:', self.best_params_)
        
        # Fit the class attribute with best params
        self.estimator.set_params(**self.best_params_)
        self.estimator.fit(X, y=y, **fit_params)
    
    def predict(self, X):
        msg = "Estimator, %(name)s, must be fitted before predicting."
        skluv.check_is_fitted(self, "best_params_", msg=msg)
        return self.estimator.predict(X)

In [4]:
dummy = DummyRegressor(a=1)

In [5]:
search_params = {'a': (-10, 10)}
pso = ParticleSwarmOptimizer(dummy, search_params, verbose=False)
print(pso)

ParticleSwarmOptimizer(estimator=DummyRegressor(a=1), min_func=0.0001,
            search_params={'a': (-10, 10)}, swarm_size=10, verbose=False)


In [6]:
n_samples = 12
X = pd.DataFrame(np.repeat(np.arange(n_samples), 2).reshape((-1, 2)), columns=['feat1', 'feat2'])
y = pd.DataFrame(np.arange(n_samples), columns=['target'])

In [7]:
pso.fit(X, y=y)

Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999986401468 Best params: {'a': 1.0019603732938089}


In [8]:
pso.score(X, y)

0.99998640146778861

In [9]:
X_test, y_test, y_pred = p2pspatial.model_selection.crossval_predict(pso, X, y, n_folds=2)

Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999998446487 Best params: {'a': 0.99975447927822225}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.99993640669 Best params: {'a': 1.0044982480003255}


In [25]:
def nested_crossval_predict(estimator, X, y, n_outer_folds=2, n_inner_folds=5):
    assert isinstance(X, pd.core.frame.DataFrame)
    assert isinstance(y, pd.core.frame.DataFrame)
    all_idx = np.arange(len(X))
    outer_groups = np.array_split(all_idx, n_outer_folds)
    
    X_outer_test = []
    y_outer_test = []
    y_pred_outer_test = []
    for outer_test_idx in outer_groups:
        outer_train_idx = np.delete(all_idx, outer_test_idx)
        inner_groups = np.array_split(outer_train_idx, n_inner_folds)
        X_inner_test = []
        y_inner_test = []
        y_pred_inner_test = []
        for inner_test_idx in inner_groups:
            inner_train_idx = np.delete(outer_train_idx, inner_test_idx)
            est = sklb.clone(estimator)
            est.fit(X.iloc[inner_train_idx, :], y.iloc[inner_train_idx])
            X_inner_test.append(X.iloc[inner_test_idx, :])
            y_inner_test.append(y.iloc[inner_test_idx])
            y_pred_inner_test.append(est.predict(y.iloc[inner_test_idx]))
        X_outer_test.append(X_inner_test)
        y_outer_test.append(y_inner_test)
        y_pred_outer_test.append(y_pred_inner_test)
    return X_outer_test, y_outer_test, y_pred_outer_test

In [26]:
Xt, yt, yp = nested_crossval_predict(pso, X, y, n_outer_folds=2, n_inner_folds=3)

Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999999533554 Best params: {'a': 1.000134533849222}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999987687652 Best params: {'a': 1.0006911959710292}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999965249714 Best params: {'a': 0.99883879220041583}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999996994716 Best params: {'a': 0.99947248973435077}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999999967247 Best params: {'a': 1.0001151388934604}
Stopping search: Swarm best objective change less than 0.0001
Best err: -0.999998590241 Best params: {'a': 0.99929043289442021}


In [28]:
yp

[[array([ 6.0008072 ,  7.00094174]),
  array([ 8.00552957,  9.00622076]),
  array([  9.98838792,  10.98722671])],
 [array([ 0.        ,  0.99947249]),
  array([ 2.00023028,  3.00034542]),
  array([ 3.99716173,  4.99645216])]]