In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error
from datasets.prostate import X_train, y_train, X_test, y_test
from models import (
    BestSubsetRegression,
    PrincipalComponentsRegression,
    PartialLeastSquares,
)

In [2]:
def fit_result(reg, X, y):
    reg.fit(X, y)
    y_pred = reg.predict(X_test)
    return pd.Series(
        data=[reg.intercept_] + reg.coef_.tolist() + [
            mean_squared_error(y_test, y_pred),
            np.std(np.power(y_test - y_pred, 2), ddof=1) / np.sqrt(y_test.size),
        ],
        index=['Intercept'] + X.columns.tolist() + ['Test Error', 'Std Error'],
    )

In [3]:
pd.set_option('precision', 3)
pd.DataFrame(data={
    'LS':          fit_result(LinearRegression(),
                              X_train, y_train),
    'Best Subset': fit_result(BestSubsetRegression(k=2),
                              X_train, y_train),
    'Ridge':       fit_result(Ridge(alpha=24),
                              X_train, y_train),
    'Lasso':       fit_result(Lasso(alpha=.223),
                              X_train, y_train),
    'PCR':         fit_result(PrincipalComponentsRegression(n_components=7),
                              X_train, y_train),
    'PLS':         fit_result(PartialLeastSquares(n_components=2, scale=False),
                              X_train, y_train),
})

Unnamed: 0,LS,Best Subset,Ridge,Lasso,PCR,PLS
Intercept,2.465,2.477,2.464,2.469,2.497,2.452
lcavol,0.676,0.736,0.42,0.533,0.548,0.417
lweight,0.262,0.315,0.238,0.179,0.287,0.343
age,-0.141,0.0,-0.048,0.0,-0.154,-0.026
lbph,0.209,0.0,0.162,0.0,0.213,0.219
svi,0.304,0.0,0.226,0.078,0.313,0.242
lcp,-0.287,0.0,-0.001,0.0,-0.062,0.078
gleason,-0.021,0.0,0.041,0.0,0.226,0.011
pgg45,0.266,0.0,0.132,0.0,-0.048,0.083
Test Error,0.521,0.492,0.49,0.487,0.449,0.527
