In [1]:
import numpy as np

## Bootstrap

In [2]:
class Bootstrap:
    
    def create_dataset(self, X, y, seed=None):
        '''creates bootstrapped dataset'''
        if seed:
            np.random.seed(seed)
        n_rows = len(X)
        indices = np.arange(n_rows)
        indices = np.random.choice(indices, size=n_rows, replace=True)
        return X[indices], y[indices]

## Create Data

In [3]:
bs = Bootstrap()

In [4]:
X = np.random.randint(0,10,50).reshape(10,5)
X

array([[0, 1, 9, 1, 4],
       [7, 8, 8, 3, 7],
       [4, 0, 0, 2, 1],
       [0, 3, 6, 7, 9],
       [9, 8, 7, 7, 9],
       [8, 2, 3, 8, 3],
       [6, 1, 7, 2, 8],
       [7, 2, 1, 4, 0],
       [0, 3, 1, 2, 4],
       [2, 7, 1, 4, 6]])

In [5]:
y = np.random.binomial(1, 0.5, 10)
y

array([1, 0, 0, 0, 0, 1, 1, 0, 0, 1])

### Example

In [6]:
bs = Bootstrap()
X, y = bs.create_dataset(X,y)

In [7]:
X

array([[8, 2, 3, 8, 3],
       [7, 8, 8, 3, 7],
       [7, 2, 1, 4, 0],
       [0, 3, 6, 7, 9],
       [4, 0, 0, 2, 1],
       [0, 3, 1, 2, 4],
       [6, 1, 7, 2, 8],
       [7, 2, 1, 4, 0],
       [2, 7, 1, 4, 6],
       [8, 2, 3, 8, 3]])

In [8]:
y

array([1, 0, 0, 0, 0, 0, 1, 0, 1, 1])

## Bagging

In [9]:
class Bagging(Bootstrap):
    
    def __init__(self):
        self.estimator = None
        
    def simulate(self, estimator, X, y, n_datasets, seed=None):
        if seed:
            self.seed=seed
        self.coefficients_ = []
        for dataset in range(n_datasets):
            X_, y_ = self.create_dataset(X, y, seed=self.seed)
            self.estimator = estimator
            self.estimator.fit(X_, y_)
            self.coefficients_.append(self.estimator.coef_)
            self.seed += 1

### Example

In [10]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()

In [11]:
bg = Bagging()
bg.simulate(lr, X, y, n_datasets=20, seed=42)

In [12]:
bg.coefficients_

[array([ 0.18075536, -0.29220824, -0.01403197,  0.10158558,  0.14698876]),
 array([ 0.32520325,  0.06504065, -0.62601626,  0.04878049,  0.57723577]),
 array([ 0.30775764, -0.051812  , -0.41732729,  0.0319932 ,  0.46630804]),
 array([ 0.05967078,  0.12962963, -0.18106996,  0.17078189,  0.01028807]),
 array([-0.06756757, -0.01351351, -0.59459459,  0.47297297,  0.12162162]),
 array([ 0.30741632,  0.24042869, -0.03135932,  0.02195831,  0.22248969]),
 array([ 0.30469716, -0.0723115 , -0.38071693,  0.02904821,  0.44684796]),
 array([ 0.13432836, -0.10447761, -0.24626866,  0.20895522,  0.36567164]),
 array([-0.25, -0.25,  0.5 ,  0.25, -0.25]),
 array([ 0.05633803,  0.03024027,  0.04391052,  0.08699254,  0.03024027]),
 array([ 0.37016575, -0.01104972, -0.63812155, -0.00828729,  0.64640884]),
 array([ 0.16489362, -0.03723404, -0.23404255,  0.07446809,  0.33510638]),
 array([ 0.1964996 , -0.0779634 , -0.3194113 ,  0.06404137,  0.39538584]),
 array([ 0.26560065, -0.06956628, -0.39629475, -0.00567

### Mean of Coefficients

In [13]:
np.mean(bg.coefficients_, axis=0)

array([ 0.17752574, -0.00287019, -0.30199819,  0.11538191,  0.31047679])

### Standard Error of Coefficients

In [14]:
np.std(bg.coefficients_, axis=0)

array([ 0.15152587,  0.15747603,  0.28881898,  0.1160515 ,  0.2182342 ])