In [2]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.22.0-py2.py3-none-any.whl (1.4 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m578.7 kB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
Installing collected packages: mlxtend
Successfully installed mlxtend-0.22.0


We will use Iris dataset with mlxtend to calculate the bias and variance of a particular model.

# Decision Tree

In [3]:
from mlxtend.evaluate import bias_variance_decomp
from sklearn.tree import DecisionTreeClassifier
from mlxtend.data import iris_data
from sklearn.model_selection import train_test_split

# Get Data Set
X, y = iris_data()
X_train_ds, X_test_ds, y_train_ds, y_test_ds = train_test_split(X, y,
                                                                test_size=0.3,
                                                                random_state=123,
                                                                shuffle=True,
                                                                stratify=y)

# Define Algorithm 
tree = DecisionTreeClassifier(random_state=123)

# Get Bias and Variance - bias_variance_decomp function
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(tree, X_train_ds, y_train_ds, X_test_ds, y_test_ds, 
                                                            loss='0-1_loss',
                                                            random_seed=123,
                                                            num_rounds=1000)

# Display Bias and Variance
print(f'Average Expected Loss: {round(avg_expected_loss, 4)}n')
print(f'Average Bias: {round(avg_bias, 4)}')
print(f'Average Variance: {round(avg_var, 4)}')

Average Expected Loss: 0.0607n
Average Bias: 0.0222
Average Variance: 0.0393


# Bagging

In [5]:
from mlxtend.evaluate import bias_variance_decomp
from sklearn.tree import DecisionTreeClassifier
from mlxtend.data import iris_data
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier

# Get Data Set
X, y = iris_data()
X_train_ds, X_test_ds, y_train_ds, y_test_ds = train_test_split(X, y,
                                                                test_size=0.3,
                                                                random_state=123,
                                                                shuffle=True,
                                                                stratify=y)

# Define Algorithm 
tree = DecisionTreeClassifier(random_state=123)
bag = BaggingClassifier(estimator=tree,
                        n_estimators=100,
                        random_state=123)

# Get Bias and Variance - bias_variance_decomp function
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(bag, X_train_ds, y_train_ds, X_test_ds, y_test_ds, 
                                                            loss='0-1_loss',
                                                            random_seed=123,
                                                            num_rounds=1000)

# Display Bias and Variance
print(f'Average Expected Loss: {round(avg_expected_loss, 4)}n')
print(f'Average Bias: {round(avg_bias, 4)}')
print(f'Average Variance: {round(avg_var, 4)}')

Average Expected Loss: 0.0459n
Average Bias: 0.0222
Average Variance: 0.024


# Linear Models

In [6]:
from sklearn import linear_model
import numpy as np
from sklearn.metrics import mean_squared_error

def calculate_bias_variance(xTest, ytest, model):
    ar = np.array([[[1],[2],[3]], [[2],[4],[6]]])
    y = ar[1,:]
    x = ar[0,:]
    if model == 1:
        reg = linear_model.LinearRegression()
        reg.fit(x,y)
        print(f'nLeast Square Coefficients: {reg.coef_}')
    if model == 2:
        reg = linear_model.Ridge (alpha = 0.1)
        reg.fit(x,y)
        print(f'nRidged Coefficients: {reg.coef_}')
    if model == 3:    
        reg = linear_model.Lasso(alpha = 0.1)
        reg.fit(x,y)
        print(f'nLasso Coefficients: {reg.coef_}')
        
    preds = reg.predict(xTest)
    er = []
    
    for i in range(len(ytest)):
        print( "Actual=", ytest[i], " Preds=", preds[i])
        x = (ytest[i] - preds[i]) **2
        er.append(x)
        
    variance_value = np.var(er)
    print (f"Variance {round(variance_value, 2)}")
    print(f"Bias: {round(mean_squared_error(ytest,preds), 2)}")
    
dateset_a = np.array([[4],[5],[6]])
dateset_b = np.array([[8.8],[14],[17]])

# Least Square Coefficients
calculate_bias_variance(dateset_a,dateset_b, 1)

# Ridged Coefficients
calculate_bias_variance(dateset_a,dateset_b, 2)

# Lasso Coefficients
calculate_bias_variance(dateset_a,dateset_b, 3)

nLeast Square Coefficients: [[2.]]
Actual= [8.8]  Preds= [8.]
Actual= [14.]  Preds= [10.]
Actual= [17.]  Preds= [12.]
Variance 101.15
Bias: 13.88
nRidged Coefficients: [[1.9047619]]
Actual= [8.8]  Preds= [7.80952381]
Actual= [14.]  Preds= [9.71428571]
Actual= [17.]  Preds= [11.61904762]
Variance 132.99
Bias: 16.1
nLasso Coefficients: [1.85]
Actual= [8.8]  Preds= 7.7
Actual= [14.]  Preds= 9.55
Actual= [17.]  Preds= 11.400000000000002
Variance 154.25
Bias: 17.46
