In [23]:
import numpy as np
import matplotlib.pyplot as plt
from src.cyc_gbm import CycGBM, tune_kappa
from sklearn.model_selection import KFold
from scipy.optimize import minimize
from src.distribution import Distribution

In [24]:
n = 100
expected_loss = 641.9173857564037
rng = np.random.default_rng(seed=10)
X0 = np.arange(0, n)
X1 = np.arange(0, n)
rng.shuffle(X1)
mu = 10 * (X0 > 0.3 * n) + 5 * (X1 > 0.5 * n)
sigma = np.exp(1 + 1 * (X0 < 0.4 * n))
max_depth = 2
min_samples_leaf = 20

X = np.stack([X0, X1]).T
y = rng.normal(mu, sigma)

kappas = [100, 10]
eps = 0.1

In [25]:
gbm = CycGBM(kappa=kappas, eps=eps, max_depth = max_depth, min_samples_leaf=min_samples_leaf)
gbm.train(X, y)
z_hat = gbm.predict(X)

loss = gbm.dist.loss(z_hat, y).sum()
print(loss)

187.46122289939993


In [26]:
# Conversion to old structure
zOpt = gbm.z0
BOpts = kappas
BOpt = max(kappas)
d = 2
epsilon = [eps]*d

In [27]:
# Distribution dependent
# Loss function
def lossEval(z,y):
    loss = z[1] + 0.5*np.exp(-2*z[1])*(y-z[0])**2
    return loss

# Gradients
def grad0(z,y):
    grad = -np.exp(-2*z[1])*(y-z[0])
    return grad

def grad1(z,y):
    grad = 1-np.exp(-2*z[1])*(y-z[0])**2
    return grad

# Gradient list
grads = [grad0,grad1]

In [28]:
# Setup
from sklearn.tree import DecisionTreeRegressor as regressionTree
# Tree fitting function
def trainTree(z, y, x, grad, max_depth, min_samples_leaf):
    tree = regressionTree(max_depth = max_depth, min_samples_leaf = min_samples_leaf)
    g = grad(z,y)
    tree.fit(x,-g)
    return tree

# Dimension indicator for score adding
e = [np.stack([[0]]*j+[[1]] +[[0]]*(d-j-1)) for j in range(0,d)]

# Local tree training function
def trainThisTree(z, y, x, grad, max_depth = max_depth, min_samples_leaf = min_samples_leaf):
    return trainTree(z,y,x,grad,max_depth,min_samples_leaf)

In [29]:
# Training and validation data
xTrain, yTrain = X, y
xValid, yValid = X, y
nTrain, nValid = len(yTrain), len(yValid)

# Initiate score
zTrain = zOpt.repeat(nTrain).reshape((d,nTrain))
zValid = zOpt.repeat(nValid).reshape((d,nValid))

# Build trees
for b in range(1,BOpt):
    # For all parameter dimensions
    for j in range(0,d):
        if b<=BOpts[j]:
            # Fit a tree
            tree = trainThisTree(zTrain,yTrain,xTrain,grads[j])

            # Score updates
            zTrainPlus = tree.predict(xTrain)
            zValidPlus = tree.predict(xValid)

            # Look at all unique node values
            nodeValues = np.unique(zTrainPlus)
            for k,nodeValue in enumerate(nodeValues):
                # Indices with this node value
                indicesTrain = zTrainPlus==nodeValue
                indicesValid = zValidPlus==nodeValue
                # Optimize step size
                gammaOpt = minimize(lambda gamma: lossEval(zTrain[:,indicesTrain] + e[j]*gamma,yTrain[indicesTrain]).sum(),
                                    jac = lambda gamma: (grads[j](zTrain[:,indicesTrain] + e[j]*gamma,yTrain[indicesTrain])).sum(),
                                    x0 = nodeValue)['x'][0]
                # Manipulate node values
                zTrainPlus[indicesTrain] = gammaOpt
                zValidPlus[indicesValid] = gammaOpt

            # Update score
            zTrain[j] += epsilon[j]*zTrainPlus
            zValid[j] += epsilon[j]*zValidPlus



In [32]:
lossEval(zValid, yValid).sum()


186.88283733605098