## Importing Necessary Libraries and Dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor
from sklearn.metrics import r2_score,mean_squared_error
from warnings import filterwarnings

## Data Preprocessing/Preparation


In [2]:
load_diabetes()

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990842, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06832974, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286377, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04687948,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452837, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00421986,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [3]:
X=load_diabetes().data
y=load_diabetes().target

In [4]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=12)

In [5]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((353, 10), (89, 10), (353,), (89,))

## Model Building


In [6]:
#Lineat Regression
lr_model=LinearRegression()
lr_model.fit(X_train,y_train)

#Decision Tree
dt_model=DecisionTreeRegressor(criterion='mae', max_depth= 3, min_samples_split= 3)
dt_model.fit(X_train,y_train)

#Ada Boosting
ab_model=AdaBoostRegressor()
ab_model.fit(X_train,y_train)

#Gradient Boosting
gb_model=GradientBoostingRegressor()
gb_model.fit(X_train,y_train)

GradientBoostingRegressor()

## Hyperparameter Tweaking With GridSearch CV

In [7]:
#Decision Tree
from sklearn.model_selection import GridSearchCV
grid_model_dt=GridSearchCV(estimator=dt_model,param_grid={'criterion':["mse", "friedman_mse", "mae", "poisson"],
                                            'max_depth':[3,5,7,8,10],
                                            'min_samples_split':[2,3,4]})
grid_model_dt.fit(X_train,y_train)
print(grid_model_dt.best_params_)
print(grid_model_dt.best_score_)

{'criterion': 'mae', 'max_depth': 3, 'min_samples_split': 2}
0.33173853473620724


## Model Testing

##### Testing for Train Data


In [8]:
y_pred_train_lr=lr_model.predict(X_train)
y_pred_train_dt=dt_model.predict(X_train)
y_pred_train_ab=ab_model.predict(X_train)
y_pred_train_gb=gb_model.predict(X_train)

##### Testing for Test Data

In [9]:
y_pred_test_lr=lr_model.predict(X_test)
y_pred_test_dt=dt_model.predict(X_test)
y_pred_test_ab=ab_model.predict(X_test)
y_pred_test_gb=gb_model.predict(X_test)

## Model Evaluation

##### Training Data Prediction Evaluation

In [10]:
mse=mean_squared_error(y_train,y_pred_train_lr)
r2=r2_score(y_train,y_pred_train_lr)
print("For Linear Regression Model The MSE={} and R2 Score={}.".format(mse,r2))

mse=mean_squared_error(y_train,y_pred_train_dt)
r2=r2_score(y_train,y_pred_train_dt)
print("For Decision Tree Model The MSE={} and R2 Score={}.".format(mse,r2))

mse=mean_squared_error(y_train,y_pred_train_ab)
r2=r2_score(y_train,y_pred_train_ab)
print("For Ada Boosting Model The MSE={} and R2 Score={}.".format(mse,r2))

mse=mean_squared_error(y_train,y_pred_train_gb)
r2=r2_score(y_train,y_pred_train_gb)
print("For Gradient Boosting Model The MSE={} and R2 Score={}.".format(mse,r2))

For Linear Regression Model The MSE=2810.495571122642 and R2 Score=0.5424703261420212.
For Decision Tree Model The MSE=3111.0679886685552 and R2 Score=0.49353916909502626.
For Ada Boosting Model The MSE=2159.3948409635186 and R2 Score=0.6484651221414334.
For Gradient Boosting Model The MSE=890.4567772790881 and R2 Score=0.8550396581018666.


##### Testing Data Prediciton Evaluation

In [11]:
mse=mean_squared_error(y_test,y_pred_test_lr)
r2=r2_score(y_test,y_pred_test_lr)
print("For Linear Regression Model The MSE={} and R2 Score={}.".format(mse,r2))

mse=mean_squared_error(y_test,y_pred_test_dt)
r2=r2_score(y_test,y_pred_test_dt)
print("For Decision Tree Model The MSE={} and R2 Score={}.".format(mse,r2))

mse=mean_squared_error(y_test,y_pred_test_ab)
r2=r2_score(y_test,y_pred_test_ab)
print("For Ada Boosting The MSE={} and R2 Score={}.".format(mse,r2))

mse=mean_squared_error(y_test,y_pred_test_lr)
r2=r2_score(y_test,y_pred_test_lr)
print("For Gradient Boosting Model The MSE={} and R2 Score={}.".format(mse,r2))

For Linear Regression Model The MSE=3108.240119411571 and R2 Score=0.38526246112295304.
For Decision Tree Model The MSE=3713.0337078651687 and R2 Score=0.2656483683208285.
For Ada Boosting The MSE=3451.0753585156335 and R2 Score=0.31745763169199714.
For Gradient Boosting Model The MSE=3108.240119411571 and R2 Score=0.38526246112295304.


## Checking Processing Speed


In [12]:
X.shape,y.shape

((442, 10), (442,))

In [13]:
%%time
ab_model.fit(X_train,y_train)

Wall time: 42 ms


AdaBoostRegressor()

In [14]:
%%time
gb_model.fit(X_train,y_train)

Wall time: 53.9 ms


GradientBoostingRegressor()

In [15]:
X=np.repeat(X,repeats=300,axis=0)
y=np.repeat(y,repeats=300,axis=0)

In [16]:
X.shape,y.shape

((132600, 10), (132600,))

In [17]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state=12)

In [18]:
#Ada Boosting
ab_model=AdaBoostRegressor()
ab_model.fit(X_train,y_train)

#Gradient Boosting
gb_model=GradientBoostingRegressor()
gb_model.fit(X_train,y_train)

GradientBoostingRegressor()

In [19]:
%%time
ab_model.fit(X_train,y_train)

Wall time: 1.58 s


AdaBoostRegressor()

In [20]:
%%time
gb_model.fit(X_train,y_train)

Wall time: 5.47 s


GradientBoostingRegressor()

In [21]:
#Processing time with Jupyter ipynb. using CPU
condition=('Without Repeat (ms)', 'With Repeat (s)')
pd.DataFrame({'AB':[89.1,7.6],
             'GB':[234,30.5]},index=condition)

Unnamed: 0,AB,GB
Without Repeat (ms),89.1,234.0
With Repeat (s),7.6,30.5
