### Bias-Variance Decomposition (Regression)

This notebook experiments with the decomposition of the performance error of several regression algorithms to its bias and variance components.

In [10]:
from mlxtend.data import boston_housing_data
from mlxtend.evaluate import bias_variance_decomp
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor

In [11]:
# dataset
X, y = boston_housing_data() # load features, labels

# split to train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y,
    test_size=0.3,                                                   
    random_state=123,                                                  
    shuffle=True
)

# regression models
lr = LinearRegression()
tree = DecisionTreeRegressor(random_state=123)
forest = RandomForestRegressor(random_state=123)
bag = BaggingRegressor(
    base_estimator=tree,
    n_estimators=100,
    random_state=123
)

# decomposition
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
    lr,
    X_train,
    y_train,
    X_test,
    y_test, 
    loss='mse',
    random_seed=123
)

# prints
print('Average expected loss: %.3f' % avg_expected_loss)
print('Average bias: %.3f' % avg_bias)
print('Average variance: %.3f' % avg_var)

Average expected loss: 29.891
Average bias: 28.609
Average variance: 1.282
