In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import numpy as np
from dataclasses import dataclass
from typing import List, Tuple


In [None]:
@dataclass
class _Node:
    is_leaf: bool=False; w:float=0.0
    feat:int=None; thr:float=None; left=None; right=None


class ScratchGBRegressor:
    def __init__(self, n_estimators=200, learning_rate=0.1,
                 max_depth=3, subsample=1.0, colsample=1.0,
                 reg_lambda=1.0, gamma=0.0, random_state=None):
        self.M=n_estimators; self.lr=learning_rate; self.depth=max_depth
        self.sub=subsample; self.col=colsample; self.lam=reg_lambda
        self.gamma=gamma; self.rng=np.random.default_rng(random_state)
        self.trees:List[_Node]=[]; self.mean_=0; self.history_=[]

    @staticmethod
    def _grad_hess(y, y_hat):
        g=-2*(y-y_hat); h=np.full_like(g,2.0); return g.ravel(),h.ravel()

    def _best_split(self, x, g, h):
        idx=np.argsort(x); g_c=np.cumsum(g[idx]); h_c=np.cumsum(h[idx])
        G,H=g_c[-1],h_c[-1]; best_gain,best_thr=-np.inf,None
        for i in range(1,len(idx)):
            if x[idx[i]]==x[idx[i-1]]: continue
            GL,HL=g_c[i-1],h_c[i-1]; GR,HR=G-GL,H-HL
            gain=(GL**2)/(HL+self.lam)+(GR**2)/(HR+self.lam)-(G**2)/(H+self.lam)-self.gamma
            if gain>best_gain: best_gain,best_thr=gain,0.5*(x[idx[i]]+x[idx[i-1]])
        return best_gain,best_thr

    def _build(self, rows, depth, X, g, h):
        G,H=g[rows].sum(),h[rows].sum(); w=-G/(H+self.lam); node=_Node(w=w)
        if depth==self.depth or len(rows)<2: node.is_leaf=True; return node
        feats=self.rng.choice(X.shape[1],max(1,int(self.col*X.shape[1])),replace=False)
        best_gain,b_f,b_t=-np.inf,None,None
        for f in feats:
            gain,thr=self._best_split(X[rows,f],g[rows],h[rows])
            if gain>best_gain: best_gain,b_f,b_t=gain,f,thr
        if best_gain<=0 or b_t is None: node.is_leaf=True; return node
        node.feat,node.thr=b_f,b_t
        L=rows[X[rows,b_f]<=b_t]; R=rows[X[rows,b_f]>b_t]
        node.left =self._build(L,depth+1,X,g,h)
        node.right=self._build(R,depth+1,X,g,h)
        return node

    def _pred_tree(self,node,X):
        if node.is_leaf: return np.full(X.shape[0],node.w)
        m=X[:,node.feat]<=node.thr
        out=np.empty(X.shape[0])
        out[m]= self._pred_tree(node.left ,X[m])
        out[~m]=self._pred_tree(node.right,X[~m])
        return out

    def fit(self,X,y):
        y=y.reshape(-1,1); self.mean_=y.mean(); y_hat=np.full_like(y,self.mean_)
        for _ in range(self.M):
            rows=self.rng.random(len(y))<self.sub
            g,h=self._grad_hess(y,y_hat)
            tree=self._build(np.where(rows)[0],0,X,g,h)
            self.trees.append(tree)
            y_hat += self.lr*self._pred_tree(tree,X).reshape(-1,1)
            self.history_.append(mean_squared_error(y, y_hat))

    def predict(self,X):
        y=np.full(X.shape[0],self.mean_)
        for t in self.trees: y += self.lr*self._pred_tree(t,X)
        return y


np.random.seed(42)
X,y=make_regression(n_samples=1000,n_features=5,noise=12)
y[np.random.choice(len(y),30,replace=False)] += np.random.normal(0,150,30)

X_tr,X_te,y_tr,y_te=train_test_split(X,y,test_size=.3,random_state=0)

my=ScratchGBRegressor(n_estimators=200,learning_rate=.1,max_depth=3,
                      subsample=.8,colsample=.8,reg_lambda=1.,random_state=0)
my.fit(X_tr,y_tr)

xgb=XGBRegressor(n_estimators=200,learning_rate=.1,max_depth=3,
                 subsample=.8,colsample_bytree=.8,reg_lambda=1.,
                 tree_method="exact",objective="reg:squarederror",
                 random_state=0,verbosity=0)
xgb.fit(X_tr,y_tr,eval_set=[(X_tr,y_tr)],eval_metric="rmse",verbose=False)
xgb_hist=np.square(xgb.evals_result()['validation_0']['rmse'])  # MSE = RMSE²

# mse curve
plt.figure(figsize=(8,6))
plt.plot(my.history_, label='ScratchGB – train MSE')
plt.plot(xgb_hist,       label='XGBoost   – train MSE')
plt.xlabel('Iteration'); plt.ylabel('MSE'); plt.title('Convergence')
plt.legend(); plt.grid(); plt.tight_layout()
plt.show()