### Bias-Variance Decomposition (Classification)

This notebook experiments with the decomposition of the performance error of several classifiers to its bias and variance components.

In [20]:
from mlxtend.data import iris_data
from mlxtend.evaluate import bias_variance_decomp
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier

In [22]:
# dataset
X, y = iris_data() # load features and labels

# split to train/test
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=123,
    shuffle=True,
    stratify=y
)

# classifiers
tree = DecisionTreeClassifier(random_state=123)
forest = RandomForestClassifier(random_state=123)
bag = BaggingClassifier(base_estimator=tree,n_estimators=100,random_state=123)
log = LogisticRegression(multi_class='multinomial', solver='newton-cg')
nbayes = GaussianNB()
knn = KNeighborsClassifier()

# loss
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
    knn,
    X_train,
    y_train,
    X_test,
    y_test, 
    loss='0-1_loss',
    random_seed=123
)

# prints
print('Average expected loss: %.3f' % avg_expected_loss)
print('Average bias: %.3f' % avg_bias)
print('Average variance: %.3f' % avg_var)

Average expected loss: 0.055
Average bias: 0.044
Average variance: 0.034
