In [1]:
from mlxtend.evaluate import bias_variance_decomp
import pandas as pd
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

In [2]:
# We will be using the diabetes dataset from the scikit-learn toy Datasets 

from sklearn.datasets import load_diabetes

loaded_data = load_diabetes(as_frame=True, scaled= True)
data = loaded_data['data']
target = loaded_data['target']
data.head(), target.head()

(        age       sex       bmi        bp        s1        s2        s3  \
 0  0.038076  0.050680  0.061696  0.021872 -0.044223 -0.034821 -0.043401   
 1 -0.001882 -0.044642 -0.051474 -0.026328 -0.008449 -0.019163  0.074412   
 2  0.085299  0.050680  0.044451 -0.005670 -0.045599 -0.034194 -0.032356   
 3 -0.089063 -0.044642 -0.011595 -0.036656  0.012191  0.024991 -0.036038   
 4  0.005383 -0.044642 -0.036385  0.021872  0.003935  0.015596  0.008142   
 
          s4        s5        s6  
 0 -0.002592  0.019907 -0.017646  
 1 -0.039493 -0.068332 -0.092204  
 2 -0.002592  0.002861 -0.025930  
 3  0.034309  0.022688 -0.009362  
 4 -0.002592 -0.031988 -0.046641  ,
 0    151.0
 1     75.0
 2    141.0
 3    206.0
 4    135.0
 Name: target, dtype: float64)

In [3]:
data.describe()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,-2.511817e-19,1.23079e-17,-2.245564e-16,-4.79757e-17,-1.3814990000000001e-17,3.9184340000000004e-17,-5.777179e-18,-9.04254e-18,9.293722000000001e-17,1.130318e-17
std,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905
min,-0.1072256,-0.04464164,-0.0902753,-0.1123988,-0.1267807,-0.1156131,-0.1023071,-0.0763945,-0.1260971,-0.1377672
25%,-0.03729927,-0.04464164,-0.03422907,-0.03665608,-0.03424784,-0.0303584,-0.03511716,-0.03949338,-0.03324559,-0.03317903
50%,0.00538306,-0.04464164,-0.007283766,-0.005670422,-0.004320866,-0.003819065,-0.006584468,-0.002592262,-0.001947171,-0.001077698
75%,0.03807591,0.05068012,0.03124802,0.03564379,0.02835801,0.02984439,0.0293115,0.03430886,0.03243232,0.02791705
max,0.1107267,0.05068012,0.1705552,0.1320436,0.1539137,0.198788,0.1811791,0.1852344,0.1335973,0.1356118


In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data.values, target.values, test_size = 0.2, random_state = 2)

In [19]:
from sklearn.tree import DecisionTreeRegressor

cls = DecisionTreeRegressor()
cls.fit(X_train, y_train)

avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(cls, X_train, y_train, X_test, y_test, loss="mse")
print(f" Loss: {round(avg_expected_loss,4)}\n Bias: {round(avg_bias,4)}\n Variance: {round(avg_var, 4)}")

 Loss: 6675.4315
 Bias: 3706.9825
 Variance: 2968.4489


In [20]:
from sklearn.naive_bayes import BernoulliNB

cls = BernoulliNB()
cls.fit(X_train, y_train)

avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(cls, X_train, y_train, X_test, y_test, loss="mse")
print(f" Loss: {round(avg_expected_loss,4)}\n Bias: {round(avg_bias,4)}\n Variance: {round(avg_var, 4)}")

 Loss: 6744.8219
 Bias: 4179.0943
 Variance: 2565.7276


In [22]:
from sklearn.ensemble import RandomForestRegressor

cls = RandomForestRegressor()
cls.fit(X_train, y_train)

avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(cls, X_train, y_train, X_test, y_test, loss="mse")
print(f" Loss: {round(avg_expected_loss,4)}\n Bias: {round(avg_bias,4)}\n Variance: {round(avg_var, 4)}")

 Loss: 3787.0984
 Bias: 3492.7113
 Variance: 294.3871


In [21]:
# Let us start with linear regressor as the baseline predictor

from sklearn.linear_model import LinearRegression

cls = LinearRegression()
cls.fit(X_train, y_train)
avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(cls, X_train, y_train, X_test, y_test, loss="mse")
print(f" Loss: {round(avg_expected_loss, 4)}\n Bias: {round(avg_bias, 4)}\n Variance: {round(avg_var, 4)}")

 Loss: 3192.7336
 Bias: 3099.9357
 Variance: 92.7979


In [11]:
import tensorflow as tf
from tensorflow import keras 

model = keras.models.Sequential([
keras.layers.Dense(units = 1, input_shape = (10, )),
])

model.compile(optimizer = "adam", loss = "mse", metrics=["accuracy"])

avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(model, X_train, y_train, X_test, y_test, loss="mse")
print(f" Loss: {round(avg_expected_loss,4)}\n Bias: {round(avg_bias,4)}\n Variance: {round(avg_var, 4)}")

In [15]:
model1 = keras.models.Sequential([
keras.layers.Dense(units = 10, input_shape = (10, )),
keras.layers.Dense(units = 1)
])

model1.compile(optimizer= 'adam', loss = 'mse', metrics=["accuracy"])

avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(model1, X_train, y_train, X_test, y_test, loss="mse")
print(f" Loss: {round(avg_expected_loss,4)}\n Bias: {round(avg_bias,4)}\n Variance: {round(avg_var, 4)}")

Epoch 1/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 29719.9492   
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 29084.0293 
Epoch 3/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 28217.6348 
Epoch 4/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0000e+00 - loss: 28946.5840 
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946us/step - accuracy: 0.0000e+00 - loss: 30427.9961
Epoch 6/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 29549.7969 
Epoch 7/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.0000e+00 - loss: 29016.6172 
Epoch 8/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.00

<keras.src.callbacks.history.History at 0x1aa639b3fd0>