-
Notifications
You must be signed in to change notification settings - Fork 0
/
GypsyBoost.py
107 lines (76 loc) · 3.53 KB
/
GypsyBoost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
"""
This module implements gradient boosting regression for all scikit-learn
estimators or user-defined algorithms. User-defined algorithm should implement
method fit() and method predict() in scikit-learn fashion.
Example:
gb = GypsyBoost(loss_function, DecisionTreeRegressor())
for loss in gb.grow_ensemble(10, X_train, y_train):
if abs(prev_loss - loss) < stop_criterion:
break
y_pred = GBLR.predict(X_test)
"""
import autograd.numpy as np
from autograd import elementwise_grad
from copy import copy
from sklearn.model_selection import train_test_split
from scipy.optimize import minimize
class GypsyBoost:
def __init__(self, loss_function, estimator):
if callable(loss_function) and hasattr(estimator, 'fit'):
self.grad_loss = elementwise_grad(loss_function)
self.loss = loss_function
self.estimator = estimator
self.X_train = None
self.y_train = None
self.ens_pred = None
self.ensemble = []
self.gammas = []
else:
raise AssertionError('wrong arguments has been passed')
def grow_ensemble(self, n_estimators, X, y, validation=0.1, shuffle=True, ordering=False):
""" this method build ensemble of estimators
passed in class constructor
Args:
n_estimators (int): number of estimators in ensemble
X (numpy array): array (n*m) with features
y (numpy array): array (n,) with target
"""
self.X_train, X_val, self.y_train, y_val = train_test_split(X, y, test_size=validation, shuffle=shuffle)
r = self.y_train.copy()
if ordering:
chunk_length = self.X_train.shape[0] // n_estimators
for i in range(n_estimators):
regressor = self.estimator
if ordering:
start = i*chunk_length
if n_estimators - i != 1:
end = start + chunk_length
else:
end = self.X_train.shape[0]
regressor.fit(self.X_train[start:end], r[start:end])
else:
regressor.fit(self.X_train, r)
if len(self.ensemble) != 0:
gamma = float(minimize(self.__loss_wrap, 1, method='L-BFGS-B').x)
self.gammas.append(gamma)
else:
self.gammas.append(1.0)
self.ensemble.append(copy(regressor))
self.ens_pred = sum(gamma*estimator.predict(self.X_train) for estimator, gamma in zip(self.ensemble, self.gammas))
r = -1*self.grad_loss(self.y_train, self.ens_pred)
yield np.mean(self.loss(y_val, self.predict(X_val)))
def __loss_wrap(self, gamma):
f_step = self.estimator.predict(self.X_train)
return np.mean(self.loss(self.y_train, self.ens_pred+gamma*f_step))
def predict(self, X):
""" this method returns prediction using ensemble
built by calling grow_ensemble()
Args:
X (numpy array): array (n*m) with features
Returns:
prediction (numpy array): array with predictions (n,)
"""
prediction = np.zeros(X.shape[0])
for estimator, gamma in zip(self.ensemble, self.gammas):
prediction += gamma*estimator.predict(X)
return prediction