In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from typing import Tuple, List, Dict
import math
import seaborn as sns
import matplotlib.pyplot as plt
from ucimlrepo import fetch_ucirepo 
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

In [8]:
# fetch dataset 
bike_sharing = fetch_ucirepo(id=275) 
  
# data (as pandas dataframes) 
# leaving year out since we want the model to work for other years as well

bike_X: pd.DataFrame = bike_sharing.data.features.loc[:,['season', 'mnth', 'hr', 'holiday', 'weekday',
       'workingday', 'weathersit', 'temp', 'atemp', 'hum', 'windspeed']]
bike_y = bike_sharing.data.targets 
bike_X.head()

Unnamed: 0,season,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
0,1,1,0,0,6,0,1,0.24,0.2879,0.81,0.0
1,1,1,1,0,6,0,1,0.22,0.2727,0.8,0.0
2,1,1,2,0,6,0,1,0.22,0.2727,0.8,0.0
3,1,1,3,0,6,0,1,0.24,0.2879,0.75,0.0
4,1,1,4,0,6,0,1,0.24,0.2879,0.75,0.0


In [15]:
bike_y.head()

Unnamed: 0,cnt
0,16
1,40
2,32
3,13
4,1


In [9]:

# %%
bike_X = pd.get_dummies(bike_X,columns=['season','mnth','hr','weekday','weathersit'])
# %%
bike_train_X, bike_test_X, bike_train_y, bike_test_y = train_test_split(bike_X,bike_y,test_size=0.20,random_state=1234)

In [29]:
#linreg = LinearRegression().fit(bike_train_X,bike_train_y)
linreg1 = DecisionTreeRegressor(random_state=1234).fit(bike_train_X,bike_train_y)
train_pred_y = linreg1.predict(bike_train_X)
train_mse = mean_squared_error(bike_train_y,train_pred_y)
train_pred_y_var = np.var(train_pred_y)
train_pred_y_bias_sq = np.mean(np.mean(train_pred_y) - bike_train_y)**2
test_pred_y = linreg1.predict(bike_test_X)
test_mse = mean_squared_error(bike_test_y,test_pred_y)
test_pred_y_bias_sq = np.mean(np.mean(test_pred_y) - bike_test_y)**2
test_pred_y_var = np.var(test_pred_y)

print(f"Train MSE:{train_mse},Variance:{train_pred_y_var},Bias squared:{train_pred_y_bias_sq}")
print(f"Test MSE:{test_mse},Variance:{test_pred_y_var},Bias squared:{test_pred_y_bias_sq}")

Train MSE:3.5558152916636696,Variance:32925.97419983679,Bias squared:3.7494427215283933e-28
Test MSE:9807.832997698504,Variance:33023.93561720613,Bias squared:20.976189183498693


In [30]:
linreg2 = DecisionTreeRegressor(max_depth=10,min_samples_split=6, random_state=1234).fit(bike_train_X,bike_train_y)
train_pred_y = linreg2.predict(bike_train_X)
train_mse = mean_squared_error(bike_train_y,train_pred_y)
train_pred_y_var = np.var(train_pred_y)
train_pred_y_bias_sq = np.mean(np.mean(train_pred_y) - bike_train_y)**2
test_pred_y = linreg2.predict(bike_test_X)
test_mse = mean_squared_error(bike_test_y,test_pred_y)
test_pred_y_bias_sq = np.mean(np.mean(test_pred_y) - bike_test_y)**2
test_pred_y_var = np.var(test_pred_y)

print(f"Train MSE:{train_mse},Variance:{train_pred_y_var},Bias squared:{train_pred_y_bias_sq}")
print(f"Test MSE:{test_mse},Variance:{test_pred_y_var},Bias squared:{test_pred_y_bias_sq}")

Train MSE:8865.42641020334,Variance:24064.103604925112,Bias squared:3.7494427215283933e-28
Test MSE:11809.556955845463,Variance:23963.275045890958,Bias squared:2.5407974215667144


In [35]:
from mlxtend.evaluate import bias_variance_decomp
linreg1 = DecisionTreeRegressor(random_state=1234)
linreg2 = DecisionTreeRegressor(max_depth=10,min_samples_split=6, random_state=1234)
avg_expected_loss1, avg_bias1, avg_var1 = bias_variance_decomp(linreg1,bike_train_X.values,bike_train_y.values,bike_test_X.values,bike_test_y.values,loss="mse",random_seed=1234,num_rounds=25)
print(f"Test MSE:{avg_expected_loss1},Variance:{avg_var1},Bias squared:{avg_bias1}")
avg_expected_loss2, avg_bias2, avg_var2 = bias_variance_decomp(linreg2,bike_train_X.values,bike_train_y.values,bike_test_X.values,bike_test_y.values,loss="mse",random_seed=1234,num_rounds=25)
print(f"Test MSE:{avg_expected_loss2},Variance:{avg_var2},Bias squared:{avg_bias2}")

Test MSE:66433.30199452711,Variance:5537.608939487021,Bias squared:211673429.0593197
Test MSE:57297.734310032276,Variance:2803.001889117321,Bias squared:189423689.8951004
