In [37]:
import numpy as np
import pandas as pd
import sys
sys.path.append("..")

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, GridSearchCV, ParameterGrid
import sklearn

from figs_ensembles import FIGSExt
from imodels.tree.figs import FIGSRegressor

In [2]:
n = 100
p = 5
X = np.random.randn(n, p)
y = X[:, 0] ** 2 + 3 * X[:, 1]

In [3]:
figs_model = FIGSExt(max_rules=20, k1=10, k2=10)
figs_model.fit(X, y)

FIGSExt(k1=10, k2=10, max_rules=20)

In [5]:
figs_model.trees_[1].value

-2.633449014410871e-15

In [6]:
figs_orig_model = FIGSRegressor(max_rules=20)
figs_orig_model.fit(X, y)

FIGSRegressor(max_rules=20)

## Linear model simulation

In [10]:
n = 200
d = 20

X = np.random.randn(n, d)
beta = np.zeros(d)
beta[[0,1,2,3,4]] = 1
sigma = 0.1
y = X @ beta + np.random.randn(n) * sigma

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

In [41]:
k1_grid = np.arange(5) * 2
k2_grid = np.arange(5) * 2
max_rules_grid = np.arange(5) * 10

params = {"k1" : k1_grid,
          "k2" : k2_grid,
          "max_rules" : max_rules_grid}

grid_search = GridSearchCV(FIGSExt(),
                           params,
                           scoring="neg_mean_squared_error",
                           cv=5)

In [42]:
grid_search.fit(X, y)

GridSearchCV(cv=5, estimator=FIGSExt(),
             param_grid={'k1': array([0, 2, 4, 6, 8]),
                         'k2': array([0, 2, 4, 6, 8]),
                         'max_rules': array([ 0, 10, 20, 30, 40])},
             scoring='neg_mean_squared_error')

In [55]:
results = grid_search.cv_results_
results_df = pd.DataFrame(results)

In [63]:
results_df[results_df["param_max_rules"]==30][["param_k1", "param_k2", "mean_test_score"]]

Unnamed: 0,param_k1,param_k2,mean_test_score
3,0,0,-1.252018
8,0,2,-1.278903
13,0,4,-1.274491
18,0,6,-1.264378
23,0,8,-1.245081
28,2,0,-0.953235
33,2,2,-0.955085
38,2,4,-0.955243
43,2,6,-0.955258
48,2,8,-0.955258
