In [14]:
import numpy as np
import os, sys

sys.path.append(os.path.abspath(".."))

from earth._basis_function_fast import BasisMatrix
from src.earth.earth_slow import EARTH
from src.earth.earth import EARTH as EARTH_fast

In [15]:
X = np.arange(6)[::-1].reshape((-1, 1))
X = np.reshape([5,4,3,3,1,1], (-1, 1))
X

array([[5],
       [4],
       [3],
       [3],
       [1],
       [1]])

In [16]:
y = np.arange(6) * 2 - 1 + (np.arange(6) > 2).astype(int)
y

array([-1,  1,  3,  6,  8, 10])

In [17]:
y = (
    2 * X[:, 0] + 3 * X[:, 0]
)  # Linear combination with noise
y

array([25, 20, 15, 15,  5,  5])

In [37]:
ea = EARTH(M_max=1)
ea.fit(X,y)
ea.predict(X)

array([25., 20., 15., 15.,  5.,  5.])

In [39]:
ea.coeffs

array([5., 0., 5.])

In [38]:
ea.basis[1].svt

[(-1, 0, np.int64(1))]

In [19]:
ea = EARTH_fast(M_max=6)
ea.fit(X,y)
ea.predict(X)

going fast
No improvement in LOF after 5 terms


array([11.31578947,  9.05263158,  6.78947368,  6.78947368,  7.73684211,
        7.73684211])

In [20]:
# Knot not a end
ts = X[0:,0]
bm = BasisMatrix(X)
bm.add_split_end(0, 0, 5)
B = bm.bx
B

array([[1., 5., 0.],
       [1., 4., 0.],
       [1., 3., 0.],
       [1., 3., 0.],
       [1., 1., 0.],
       [1., 1., 0.]])

In [21]:
B.transpose()

array([[1., 1., 1., 1., 1., 1.],
       [5., 4., 3., 3., 1., 1.],
       [0., 0., 0., 0., 0., 0.]])

In [22]:
B.transpose() @ y

array([ 85., 305.,   0.])

In [41]:
for t in ts:
    print(t)
    bm = BasisMatrix(X)
    bm.add_split_end(0, 0, t)
    Bi = bm.bx
    coeffs, ssr, _, _ = np.linalg.lstsq(Bi, y, rcond=None)
    print(coeffs, ssr)
    print(Bi@coeffs)

5
[2.0476084e-15 5.0000000e+00 0.0000000e+00] []
[25. 20. 15. 15.  5.  5.]
4
[5.36569686e-15 5.00000000e+00 5.27355937e-15] [6.99338614e-30]
[25. 20. 15. 15.  5.  5.]
3
[1.18466502e-14 5.00000000e+00 4.77395901e-15] [1.50740173e-32]
[25. 20. 15. 15.  5.  5.]
3
[1.18466502e-14 5.00000000e+00 4.77395901e-15] [1.50740173e-32]
[25. 20. 15. 15.  5.  5.]
1
[1.66666667 3.33333333 1.66666667] []
[25. 20. 15. 15.  5.  5.]
1
[1.66666667 3.33333333 1.66666667] []
[25. 20. 15. 15.  5.  5.]


In [24]:
for t in ts:
    print(t)
    bm = BasisMatrix(X)
    bm.add_split_end(0, 0, t)
    Bi = bm.bx
    ci = Bi.transpose() @ y
    print(ci)

5
[ 85. 305.   0.]
4
[ 85. 305.  25.]
3
[ 85. 305.  70.]
3
[ 85. 305.  70.]
1
[ 85. 305. 220.]
1
[ 85. 305. 220.]


In [26]:
for t in ts:
    print(t)
    bm = BasisMatrix(X)
    bm.add_split_end(0, 0, t)
    Bi = bm.bx
    vi = Bi.transpose() @ Bi
    print(vi)

5
[[ 6. 17.  0.]
 [17. 61.  0.]
 [ 0.  0.  0.]]
4
[[ 6. 17.  1.]
 [17. 61.  5.]
 [ 1.  5.  1.]]
3
[[ 6. 17.  3.]
 [17. 61. 14.]
 [ 3. 14.  5.]]
3
[[ 6. 17.  3.]
 [17. 61. 14.]
 [ 3. 14.  5.]]
1
[[ 6. 17. 11.]
 [17. 61. 44.]
 [11. 44. 33.]]
1
[[ 6. 17. 11.]
 [17. 61. 44.]
 [11. 44. 33.]]


In [11]:
# Normal equations
V = B.transpose() @ B
c = B.transpose() @ y
# np.linalg.inv(V) @ c

In [12]:
def get_c_and_all_cns(X: np.ndarray, B: np.ndarray, y: np.ndarray, m: int) -> list:
    xv = X[:, m]
    bm = B[:, m]
    c = B.transpose() @ y
    lowest = xv[0]
    queue1 = 0
    queue2 = 0
    ts = xv[1:]
    a1 = 0
    a2 = 0
    cns = [c[-1]]
    for i, t in enumerate(xv):
        if i == 0:
            continue
        # print(t)
        k = i - 1
        if t < lowest:
            lowest = t         
            a1 += bm[k] * xv[k] * y[k] + queue1
            a2 += bm[k] * y[k] + queue2
            cns.append(cns[0] + a1 - a2 * t)
            queue1 = queue2 = 0
            # print(cns[-1])
        else:
            print("not lowest", t)
            queue1 += bm[k] * xv[k] * y[k]
            queue2 += bm[k] * y[k]
            cns.append(cns[-1])
            # print(cns[-1])
    return c, cns


c, cns = get_c_and_all_cns(X, B, y, 0)
cns

not lowest 3
not lowest 1


[np.float64(0.0),
 np.float64(-1.0),
 np.float64(-1.0),
 np.float64(-1.0),
 np.float64(17.0),
 np.float64(17.0)]

In [13]:
def update_c(c, u: int, t: int, xv, y, m, B) -> None:
    """
    Update c
    """
    mask_1 = (xv >= t) & (xv < u)
    mask_2 = xv >= u
    c[-1] = (
        c[-1]
        + np.sum(y[mask_1] * B[mask_1, m] * (xv[mask_1] - t))
        + (u - t) * np.sum(y[mask_2] * B[mask_2, m])
    )
    return c


update_c(c.copy(), u=5, t=1, xv=X[:, 0], y=y, m=0, B=B)

array([27., 44., 17.])

In [14]:
V[:,2]

array([0., 0., 0.])

In [26]:
def get_V_and_all_VNNs(X: np.ndarray, B: np.ndarray, m: int, i: int) -> list:
    V = B.transpose() @ B
    xv = X[:, m]
    M = B.shape[1]
    bm = B[:, m]
    bi = B[:, i]
    lowest = xv[0]
    queue1: float = 0
    queue2: float = 0
    ts = xv
    a2 = 0
    a1 = 0
    vis = [V[i,-1]]
    for i, t in enumerate(ts):
        if i == 0:
            continue
        print(i,t)
        j = i - 1
        if t < lowest:
            a2 += bm[j] * bi[j] + queue2
            a1 += bm[j] * bi[j] * xv[j] + queue1
            vis.append(vis[0] + a1 - a2 * t)
        else:
            # print("not lowest")
            queue1 += bm[j] * bi[j] * xv[j]
            queue2 += bm[j] * bi[j]
            vis.append(vis[-1])
        lowest = t
    return V, vis

V, vis = get_V_and_all_VNNs(X, B, 0, 1)
vis

1 4
2 3
3 3
4 1
5 1


[np.float64(0.0),
 np.float64(5.0),
 np.float64(14.0),
 np.float64(14.0),
 np.float64(44.0),
 np.float64(44.0)]

In [26]:
def update_V(u, t, m, xv, B, V):
    """
    Update covariance matrix
    """
    mask_t = xv >= t
    mask_u = xv >= u
    mask_1 = mask_t & (xv < u)
    for i in range(V.shape[0] - 1):
        V[i, -1] = (
            V[i, -1]
            + np.sum((B[mask_1, i]) * B[mask_1, m] * (xv[mask_1] - t))
            + (u - t) * np.sum((B[mask_u, i]) * B[mask_u, m])
        )
        V[-1, i] = V[i, -1]
    adder = (
        np.sum((B[mask_1, m] ** 2) * ((xv[mask_1] - t) ** 2))
        +np.sum((B[mask_u, m] ** 2) * (t**2 - u**2))
        +2 * (u - t) * np.sum((B[mask_u, m] ** 2)*xv[mask_u])
    )
    V[-1, -1] = V[-1, -1] + adder
    return V


update_V(u=5, t=1, m=0, xv=X[:, 0], B=B, V=V.copy())

array([[ 6., 17., 11.],
       [17., 61., 44.],
       [11., 44., 33.]])