In [3]:
import numpy as np
import pandas as pd


class GradientBoostingRegressor:
    """Gradient boosting regressor."""

    def fit(self, X, y):
        """Fit the model to the data.

        Args:
            X: array-like of shape (n_samples, n_features)
            y: array-like of shape (n_samples,)

        Returns:
            GradientBoostingRegressor: The fitted model.
        """
        self.base_pred_ = np.mean(y)
        return self

    def predict(self, X):
        """Predict the target of new data.

        Args:
            X: array-like of shape (n_samples, n_features)

        Returns:
            y: array-like of shape (n_samples,)
            The predict values.
            
        """
        
        predictions = self.base_pred_
        return np.array([predictions] * len(X))


In [84]:
import numpy as np
from typing import Tuple


def mse(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, np.ndarray]:
    """Mean squared error loss function and gradient."""

    loss = np.mean((y_pred - y_true)**2)
    grad = 2 * (y_pred - y_true)
    
    return loss, (grad - loss)


def mae(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, np.ndarray]:
    """Mean absolute error loss function and gradient."""
    
    loss = np.mean(abs(y_pred - y_true))
    grad = np.where(y_pred == y_true, y_true, np.where((y_pred - y_true) > 0 , 1, -1))
    
    return loss, grad

In [124]:
from typing import Tuple
import numpy as np

from sklearn.tree import DecisionTreeRegressor


class GradientBoostingRegressor:
    def __init__(
        self,
        n_estimators=100,
        learning_rate=0.1,
        max_depth=3,
        min_samples_split=2,
        loss="mse",
        verbose=False,
    ):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.loss = loss
        self.verbose = verbose
        self.trees_ = []

    def _mse(self, y_true, y_pred):

        loss = np.mean((y_pred - y_true)**2)
        grad = y_pred - y_true
        if self.verbose == True:
            print(loss)

        return loss, grad

    def fit(self, X, y):
        """
        Fit the model to the data.

        Args:
            X: array-like of shape (n_samples, n_features)
            y: array-like of shape (n_samples,)

        Returns:
            GradientBoostingRegressor: The fitted model.
        """
        if self.loss == 'mse':

            for estim in range(self.n_estimators):
                if estim == 0:
                    self.base_pred_ = np.mean(y)
                    y_pred = -1 * \
                        (y, self._mse(y, np.array([self.base_pred_] * len(y))))

                model = DecisionTreeRegressor(max_depth=self.max_depth,
                                              min_samples_split=self.min_samples_split).fit(X, y_pred)
                self.trees_.append(model)
                _, self.grad = self._mse(model.predict(X), y_pred)
                y_pred = y_pred * self.learning_rate
        else:
            for estim in range(self.n_estimators):
                if estim == 0:
                    self.base_pred_ = np.mean(y)
                    y_pred = -1 * \
                        (y, self.loss(y, np.array([self.base_pred_] * len(y))))

                model = DecisionTreeRegressor(max_depth=self.max_depth,
                                              min_samples_split=self.min_samples_split).fit(X, y_pred)
                self.trees_.append(model)
                _, self.grad = self.loss(model.predict(X), y_pred)
                y_pred = y_pred * self.learning_rate

        return self

    def predict(self, X):
        """Predict the target of new data.

        Args:
            X: array-like of shape (n_samples, n_features)

        Returns:
            y: array-like of shape (n_samples,)
            The predict values.

        """
        predictions = np.zeros(X.shape[0])

        y = self.base_pred_ + predictions
        print(y)
        for model in self.trees_:
            y = y + model.predict(X)

        return y

In [6]:
from typing import Tuple
import numpy as np

from sklearn.tree import DecisionTreeRegressor


class GradientBoostingRegressor:
    def __init__(
        self,
        n_estimators=100,
        learning_rate=0.1,
        max_depth=3,
        min_samples_split=2,
        loss="mse",
        verbose=False,
        subsample_size=0.5,
        replace=False

    ):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.verbose = verbose
        self.subsample_size = subsample_size
        self.replace = replace
        self.loss = loss
        if loss == 'mse':
            self.loss = self._mse
        self.trees_ = []

    def _mse(self, y_true, y_pred):

        loss = np.mean((y_pred - y_true)**2)
        grad = y_pred - y_true
        if self.verbose is True:
            print(loss)

        return loss, grad

    def _subsample(self, X, y):

        mask = np.random.choice(range(len(y)), int(len(y) * self.subsample_size),
                                replace=self.replace)
        sub_x = X[mask]
        sub_y = y[mask]

        return sub_x, sub_y

    def fit(self, X, y):
        """
        Fit the model to the data.

        Args:
            X: array-like of shape (n_samples, n_features)
            y: array-like of shape (n_samples,)

        Returns:
            GradientBoostingRegressor: The fitted model.
        """
        self.base_pred_ = np.mean(y)
        y_pred = np.full(len(y), self.base_pred_)
        grad = -1 * self.loss(y, y_pred)[1]

        for estim in range(self.n_estimators):
            model = DecisionTreeRegressor(max_depth=self.max_depth,
                                          min_samples_split=self.min_samples_split).fit(X, grad)
            self.trees_.append(model)

            X_sub, y_sub = self._subsample(X, y_pred)

            y_pred = y_pred + model.predict(X) * self.learning_rate

            grad = -1 * self.loss(y, y_pred)[1]

        return self

    def predict(self, X):
        """Predict the target of new data.

        Args:
            X: array-like of shape (n_samples, n_features)

        Returns:
            y: array-like of shape (n_samples,)
            The predict values.

        """
        predictions = np.zeros(X.shape[0])

        y = self.base_pred_ + predictions
        for model in self.trees_:
            y = y + model.predict(X) * self.learning_rate

        return y

In [3]:
import pandas as pd
data = pd.read_csv('/home/artur/my_projects/karpov-projects/intern/dec tree/COUNT_SKU_2023_09_07.csv')
y = data['delay_days'].values
X = data.iloc[:, :-1].values

In [4]:
y

array([ -4,   7,   0,  -8,  43,   0,   3,   0,   0,  12,   8,  32,  35,
         1,   0,  12,   0,  28,  15,   1,  22,   4,  10,   5,   0,   0,
        39,   4,  18,  27,   1,   6,  19,  20,  10,  24,   0,   0,  10,
         0,  37,   0,  60,   1,   5,   0,   1,   0,  85,  24,  15,  13,
         8,  27,   5,   0,   6,   1,   2,   3,   0,  10,   0,   3,   0,
         0,  24,   0,   0,   1,  11,   0,  23,   0,   0,  11,  12,   0,
        35,  41,  59,  24,   0,   0,   0,   1,  71,   4,   8,  13,   0,
         6,   1,   1,   3,  45,   6,  31,   4,   3,   2,  17,  11,   8,
         1,   1,   0,   0,  33,   3,   0,   0,   0,  14,   0,   8,   1,
         8,  18,  69,   5,   0,  34, 100,  20,  23,  26,  34,  23,  19,
         1,  22,   0,  59,   0,   0,   2,  17,  10,   6,   0,   0,   0,
         0, 153,   0,   0,   9,   0,   0,  59,   5,   7,  91,   0,  15,
        64,  13,   1,  14,   7,   0,   0,   4,  19,  23,   5,  48,   3,
         2,  11,   0,  58,   3,  23,  46,   5,   2,   1,   6,   

In [7]:
GradientBoostingRegressor().fit(X,y).predict(X[0].reshape(1,-1))

array([10.06825865])