# Regression test

Test the two-stage ridge regressor on a regular regression dataset.

In [12]:
from sklearn.datasets import load_boston
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, BayesianRidge

from twostageridge import TwoStageRidge

In [13]:
data = load_boston()
X, y = data.data, data.target

In [14]:
params = {
    "twostageridge__regulariser1": [0.01, 0.1, 1., 10., 100.],
    "twostageridge__regulariser2": [0.01, 0.1, 1., 10., 100.]
}

model = make_pipeline(
    StandardScaler(),
    TwoStageRidge(treatment_col=12)
)

In [15]:
gs = GridSearchCV(model, param_grid=params, cv=5)
gs.fit(X, y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('twostageridge',
                                        TwoStageRidge(treatment_col=12))]),
             param_grid={'twostageridge__regulariser1': [0.01, 0.1, 1.0, 10.0,
                                                         100.0],
                         'twostageridge__regulariser2': [0.01, 0.1, 1.0, 10.0,
                                                         100.0]})

In [16]:
gs.best_score_

0.41214332864614134

In [17]:
gs.best_params_

{'twostageridge__regulariser1': 0.01, 'twostageridge__regulariser2': 10.0}

In [18]:
model = make_pipeline(
    StandardScaler(),
    Ridge()
)
gs = GridSearchCV(model, param_grid={"ridge__alpha": [0.01, 0.1, 1., 10., 100.]}, cv=5)
gs.fit(X, y)
gs.best_score_

0.4820711004593853

In [19]:
gs.best_params_

{'ridge__alpha': 100.0}

In [20]:
model = make_pipeline(
    StandardScaler(),
    BayesianRidge()
)
gs = GridSearchCV(model, param_grid={}, cv=5)
gs.fit(X, y)
gs.best_score_

0.3790560205589541

In [21]:
import pandas as pd

Xdf = pd.DataFrame(X, columns=[str(n) for n in range(X.shape[1])])

In [22]:
model = make_pipeline(
    StandardScaler(),
    TwoStageRidge(treatment_col=12)
)
gs = GridSearchCV(model, param_grid=params, cv=5)
gs.fit(Xdf, y)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('twostageridge',
                                        TwoStageRidge(treatment_col=12))]),
             param_grid={'twostageridge__regulariser1': [0.01, 0.1, 1.0, 10.0,
                                                         100.0],
                         'twostageridge__regulariser2': [0.01, 0.1, 1.0, 10.0,
                                                         100.0]})

In [23]:
hasattr(Xdf, 'columns')

True

In [24]:
ind = list(Xdf.columns).index("12")

In [25]:
ind

12

In [26]:
type(ind)

int

In [27]:
Xdf.to_numpy()

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])