In [1]:
from timer import Timer
import numpy as np
import pandas as pd

from highly_adaptive_regression import HighlyAdaptiveLassoCV, HighlyAdaptiveRidgeCV as BasisHARCV
from kernel_ridge import HighlyAdaptiveRidgeCV, RadialBasisKernelRidgeCV, MixedSobolevRidgeCV 
from kernel_ridge import kernels, KernelRidge

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.dummy import DummyRegressor

import pandas as pd
import glob
import os
import altair as alt

In [2]:
def dgp(n,d):
    X = np.random.uniform(size=(n,d))
    # Y = ((X[:,0] > 1/3) + (X[:,0] > 2/3))/2
    Y = X[:,0] + np.random.normal(size=(n,), scale=0.2)
    # Y = np.sum(X, axis=1)/d
    return X,Y

In [3]:
LEARNERS = {
    'Mean':DummyRegressor(strategy="mean"),
    'Ridge Regression':Ridge(alpha=1e-3),
    'Random Forest':RandomForestRegressor(n_estimators=2000, n_jobs=-1),
    'Radial Basis KRR':RadialBasisKernelRidgeCV(
        gammas=[0.001, 0.01, 0.1, 1, 10], 
        eps=1e-10,
        max_alpha_coef_norm = 10,
    ),
    'Mixed Sobolev KRR':MixedSobolevRidgeCV(
        eps=1e-6,
        max_alpha_coef_norm = 100,
    ),
    # 'HAR':HighlyAdaptiveRidgeCV(eps=1e-10, order=0),
    # 'HAL':HighlyAdaptiveLassoCV(),
    # 'Basis HAR':BasisHARCV(eps=1e-10),
}

In [4]:
# D_LIST = [5*k for k in np.arange(1,9)]
D_LIST = [5, 8, 10, 20, 30]
N_TRAIN = [20, 100, 200]
N_TEST = 1000
N_REPS = 5

results = []
for n in N_TRAIN:
    for d in D_LIST:
        for rep in range(N_REPS):
            X_full, Y_full = dgp(n+N_TEST, d)
            X, X_, Y, Y_ = train_test_split(X_full, Y_full, test_size=N_TEST)
            learner_timer = Timer()
            for name, learner in LEARNERS.items():
                with learner_timer.task("time fitting"):
                    learner.fit(X,Y)
                with learner_timer.task("time predicting"):
                    mse = np.mean((learner.predict(X_) - Y_)**2)

                results += [{
                    'n': n,
                    'd': d,
                    'learner': name,
                    'mse': mse,
                    **learner_timer.durations,
                }]











KeyboardInterrupt: 

In [18]:
from kernel_ridge import ClippedMinMaxScaler
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse

N_TEST = 1000

X_full, Y_full = dgp(100+N_TEST, 30)
X, X_, Y, Y_ = train_test_split(X_full, Y_full, test_size=N_TEST)

mskrr = MixedSobolevRidgeCV(
    alphas = [[1e-8, 1e-4, 1e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1, 5, 1e1, 5e1, 1e2, 1e3, 1e4, 1e8]]
)
# mskrr = KernelRidge(kernels.MixedSobolev())
mskrr.fit(X,Y)

clipper = ClippedMinMaxScaler()
K = mskrr.kernels[0](clipper.fit_transform(X))
eigs = np.linalg.eig(K)[0]
print(np.min(eigs), np.max(eigs))

print(np.sqrt(np.mean((mskrr.predict(X_) - Y_)**2)))

pd.DataFrame({
    'alpha':[m.alpha for m in mskrr.models], 
    'train': [mse(Y, m.predict(X, k=m.K)) for m in mskrr.models], 
    'cv': [m.cv(Y, KFold()) for m in mskrr.models], 
    'test': [mse(Y_, m.predict(X_)) for m in mskrr.models],
})

28.796815875717144 205.78450251538922
0.326356668577497
28.796815875717144 205.78450251538922
0.326356668577497


In [10]:
from sklearn.model_selection import KFold
KFold()

KFold(n_splits=5, random_state=None, shuffle=False)

In [11]:
kernels.HighlyAdaptiveRidge()

HighlyAdaptiveRidge: {'depth': inf, 'order': 0}

In [73]:
df = (
    pd.DataFrame(results)
    .groupby(['n', 'd', 'learner'], as_index=False)  
    .agg({
        'mse': np.mean,  
        'time fitting': np.mean,
        'time predicting': np.mean,
    })
    # mutate mse to rmse by taking sqrt
    .assign(**{
        'rmse': lambda df: np.sqrt(df['mse']),
    })
)

alt.Chart(df).mark_line().encode(
    x='d', 
    y='mse',
    color = 'learner',
    column='n',
).properties(
    width=200,
    height=200,
).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).configure_legend(
    labelFontSize=14,
    titleFontSize=16
).configure_header(
    labelFontSize=16,  # Increases the font size of the facet labels
    titleFontSize=18   # Increases the font size of the facet titles
)



In [21]:
df = (
    pd.DataFrame([*results, *more_results])
    .groupby(['n', 'd', 'learner'], as_index=False)  
    .agg({
        'mse': np.mean,  
        'time fitting': np.mean,
        'time predicting': np.mean,
    })
    # mutate mse to rmse by taking sqrt
    .assign(**{
        'rmse': lambda df: np.sqrt(df['mse']),
    })
)

chart = alt.Chart(df).mark_line().encode(
    x='d', 
    y='mse',
    color = 'learner',
    column='n',
).properties(
    width=200,
    height=200,
).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).configure_legend(
    labelFontSize=14,
    titleFontSize=16
).configure_header(
    labelFontSize=16,  # Increases the font size of the facet labels
    titleFontSize=18   # Increases the font size of the facet titles
)
chart.save('results/plots/dim.pdf')
chart



In [56]:
# np.mean((LEARNERS['HAR'].predict(X) - Y)**2)
i = 0
x = X[i:i+1,:]
x_ = X_[0:1,:]
x_ = np.zeros((1,d))
x_[0,0] = 1
y = Y[i]
[LEARNERS['HAR'].predict(x*(1-eps) + x_*eps) for eps in np.linspace(0,1,20)]

[array([0.48603397]),
 array([0.49614828]),
 array([0.49596053]),
 array([0.49551239]),
 array([0.49516332]),
 array([0.49496077]),
 array([0.49474899]),
 array([0.49457063]),
 array([0.49444688]),
 array([0.49432755]),
 array([0.49413921]),
 array([0.49408724]),
 array([0.49403761]),
 array([0.49402392]),
 array([0.4940092]),
 array([0.49400462]),
 array([0.49400304]),
 array([0.4940023]),
 array([0.49400196]),
 array([0.49400182])]

In [57]:
kernel = kernels.HighlyAdaptiveRidge()
K = kernel(X)
k = kernel(X,X_)

In [60]:
np.linalg.eig(K)[0]

array([1.71010212e+09, 1.47345499e+09, 1.33592926e+09, 1.29630484e+09, 1.28666493e+09, 1.27908859e+09, 1.26827219e+09, 1.25690520e+09, 1.25062349e+09, 1.24150660e+09, 1.22671676e+09, 1.21737199e+09,
       1.21285337e+09, 1.20748665e+09, 1.20120250e+09, 1.19783708e+09, 1.18430920e+09, 1.18350227e+09, 1.17831834e+09, 1.17478422e+09, 1.17313671e+09, 1.17160685e+09, 1.16814177e+09, 1.16621162e+09,
       1.16169131e+09, 1.16228200e+09, 1.15919342e+09, 1.15275593e+09, 1.15040962e+09, 1.14822848e+09, 1.14767290e+09, 1.04202195e+09, 1.05692496e+09, 1.14600212e+09, 1.14444396e+09, 1.14343448e+09,
       1.14205731e+09, 1.14133121e+09, 1.13825343e+09, 1.13450207e+09, 1.13341819e+09, 1.13255993e+09, 1.13055339e+09, 1.12864894e+09, 1.12741923e+09, 1.12569649e+09, 1.12454802e+09, 1.12381401e+09,
       1.12247261e+09, 1.06386979e+09, 1.06602829e+09, 1.06656783e+09, 1.11998694e+09, 1.11950146e+09, 1.11824686e+09, 1.11792020e+09, 1.11759291e+09, 1.11664454e+09, 1.11599849e+09, 1.11564227e+09,
     

In [25]:
n = X.shape[0]
alpha = 1e3

K_ = np.vstack([
    np.hstack([
        K + alpha*np.eye(n), np.ones((n,1))
    ]),
    np.hstack([
        np.ones((1,n)), np.zeros((1,1))
    ])
])

H = np.linalg.solve(
    K_.T, 
    np.vstack([K, np.ones((1,n))])
)
np.diag(H)

array([0.78691723, 0.85035828, 0.83963016, 0.68360725, 0.71067461, 0.70277325,
       0.80411362, 0.92706831, 0.78254164, 0.78501977, 0.72281755, 0.90374592,
       0.76374014, 0.75797403, 0.88658112, 0.93538776, 0.83149068, 0.74862816,
       0.83436758, 0.83816175, 0.70338428, 0.80174393, 0.81012508, 0.84590675,
       0.91539449, 0.81035468, 0.85327938, 0.59817944, 0.88326098, 0.88077188,
       0.82603212, 0.71127013, 0.8667963 , 0.77457104, 0.8544529 , 0.78878767,
       0.78077796, 0.81379869, 0.85464466, 0.96377521, 0.92036088, 0.84757023,
       0.72853716, 0.90389241, 0.82082519, 0.7838606 , 0.76158042, 0.74859731,
       0.85754414, 0.89265877, 0.77001316, 0.91182893, 0.70848276, 0.89593773,
       0.79943741, 0.80648752, 0.81731983, 0.76517184, 0.75505112, 0.87763439,
       0.75470649, 0.77101551, 0.7562484 , 0.91799292, 0.76245211, 0.82122909,
       0.69151305, 0.62972069, 0.84298703, 0.80517437, 0.86360632, 0.78715275,
       0.80562601, 0.78067766, 0.75771464, 0.7715561