In [1]:
import pandas as pd
import numpy as np

## Load Data

In [2]:
from pycaret.datasets import get_data
data = get_data('insurance')

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## Setup Data for Model/Data Preparation

Steps performed by setup function:
- Train Test Split
- Scaling
- Transformation
- OneHotEncoding
- Missing Value Imputation
- Feature Engineering

In [3]:
from pycaret.regression import *
s = setup(data, target = 'charges', session_id = 123)#, use_gpu=True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,charges
2,Original Data,"(1338, 7)"
3,Missing Values,False
4,Numeric Features,2
5,Categorical Features,4
6,Ordinal Features,False
7,High Cardinality Features,False
8,High Cardinality Method,
9,Transformed Train Set,"(936, 14)"


In [4]:
# check transformed X_train (Includes OneHotEncoded Columns, processed by setup function)
get_config('X_train')

Unnamed: 0,age,bmi,sex_female,children_0,children_1,children_2,children_3,children_4,children_5,smoker_no,region_northeast,region_northwest,region_southeast,region_southwest
300,36.0,27.549999,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
904,60.0,35.099998,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
670,30.0,31.570000,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
617,49.0,25.600000,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
373,26.0,32.900002,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1238,37.0,22.705000,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1147,20.0,31.920000,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
106,19.0,28.400000,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
1041,18.0,23.084999,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


# 👉 Model Training & Selection

## Compare Models

 - Train all models (available in pycaret library) using default hyperparameters 
 - All Models are trained using KFold Cross Validation
 - Mean values of each model mentioned in Table

In [5]:
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
gbr,Gradient Boosting Regressor,2707.6623,23281021.7989,4805.264,0.8345,0.4406,0.3127,0.027
catboost,CatBoost Regressor,2844.4446,24943135.5224,4977.1926,0.8228,0.4707,0.3364,0.589
rf,Random Forest Regressor,2748.7705,25058324.2434,4990.5827,0.8199,0.4684,0.3308,0.188
lightgbm,Light Gradient Boosting Machine,2959.5584,25236477.0456,5013.0892,0.8171,0.5427,0.3685,0.084
ada,AdaBoost Regressor,4162.2323,28328260.0955,5316.6146,0.7985,0.6349,0.7263,0.011
et,Extra Trees Regressor,2805.9612,28735821.1695,5333.4655,0.7972,0.4901,0.3356,0.186
xgboost,Extreme Gradient Boosting,3302.3215,31739266.6,5615.5941,0.7701,0.5661,0.4218,0.334
llar,Lasso Least Angle Regression,4315.7901,38355972.3465,6173.8736,0.7311,0.6105,0.4415,0.006
ridge,Ridge Regression,4336.2304,38381492.8,6175.9537,0.7309,0.6193,0.4454,0.007
br,Bayesian Ridge,4333.6881,38381669.3629,6175.9476,0.7308,0.6151,0.445,0.006


## Check Best Model

In [6]:
print(best)

GradientBoostingRegressor(alpha=0.9, ccp_alpha=0.0, criterion='friedman_mse',
                          init=None, learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=100,
                          n_iter_no_change=None, presort='deprecated',
                          random_state=123, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)


In [7]:
type(best)

sklearn.ensemble._gb.GradientBoostingRegressor

## Create Individual Model

- **create_model()** function will train one model passed as parameter Kfold times

In [8]:
dt = create_model('dt', fold = 5)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,3138.2387,44004437.1489,6633.584,0.7407,0.552,0.3756
1,3107.5154,43203348.9249,6572.9254,0.5663,0.618,0.4905
2,3651.9177,50733227.3856,7122.7261,0.6531,0.5562,0.3964
3,3303.8003,47951619.677,6924.7108,0.7082,0.4818,0.2388
4,3272.3807,44769635.5993,6691.0116,0.7106,0.5592,0.3933
Mean,3294.7706,46132453.7471,6788.9916,0.6758,0.5534,0.3789
SD,193.7543,2808624.7794,205.0535,0.0616,0.0432,0.0807


In [9]:
print(dt)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=123, splitter='best')


## Tune HyperParameters

- parameters are tuned for the passed model using RandomizedSearch on dynamically defined hyperparameter SearchSpace for each estimator 
- custom search space can be passed as parameter **custom_grid**
- To change RandomSearch bheaviour of iterating over Search space, we can use **search_library** parameter. Options are: 
    - **optuna** (will use methods defined by optuna to iterate over SearchSpace), 
    - **scikit-optimize** 
    - **scikit-learn**
    - **tune-sklearn**

In [10]:
tuned_dt = tune_model(dt, search_library = 'optuna')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,2734.2377,21462762.6289,4632.7921,0.8718,0.407,0.3073
1,2880.1883,30051973.0576,5481.968,0.8253,0.449,0.2871
2,2645.7491,21946974.7265,4684.7598,0.7221,0.5009,0.3709
3,2772.5426,22227368.5512,4714.591,0.8144,0.4476,0.3636
4,2962.9698,24346002.8018,4934.1669,0.8189,0.464,0.3347
5,2575.2848,18451315.3339,4295.4994,0.8813,0.3481,0.2533
6,2766.8945,19659509.9272,4433.9046,0.8662,0.4411,0.3719
7,2489.9566,22155290.115,4706.9406,0.8782,0.4297,0.2566
8,2652.5491,19561567.5797,4422.8461,0.8737,0.4105,0.3316
9,2901.8996,27819730.9009,5274.4413,0.8208,0.4954,0.3475


In [11]:
tuned_dt = tune_model(dt, search_library = 'scikit-learn')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,1710.0867,18253568.8962,4272.4196,0.891,0.3435,0.1349
1,2342.9618,33002910.7856,5744.816,0.8081,0.4462,0.1421
2,1992.6884,23279759.5944,4824.9103,0.7053,0.4672,0.158
3,2250.2711,25594847.875,5059.1351,0.7863,0.4246,0.2126
4,2157.4516,24978154.439,4997.815,0.8142,0.4363,0.1531
5,1991.3288,18794342.2788,4335.2442,0.8791,0.3399,0.1565
6,1688.3935,20093049.8225,4482.5272,0.8633,0.3137,0.121
7,2060.8145,26178263.6299,5116.4698,0.8561,0.4613,0.1332
8,2088.226,23545921.7229,4852.414,0.8479,0.3741,0.1592
9,2233.1985,27217915.9631,5217.0793,0.8247,0.4302,0.1662


In [12]:
tuned_dt = tune_model(dt, search_library = 'scikit-optimize')

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,1675.8165,17927404.4921,4234.0766,0.8929,0.3501,0.125
1,2276.0277,33091847.5232,5752.5514,0.8076,0.441,0.1329
2,1935.0588,23241585.5791,4820.9528,0.7057,0.4875,0.1412
3,2120.6005,25430767.7388,5042.8928,0.7877,0.3998,0.1682
4,2160.0397,25443788.3092,5044.1836,0.8108,0.4616,0.1577
5,1938.3144,19274854.2167,4390.3137,0.876,0.3389,0.1406
6,1715.335,20876047.1022,4569.0313,0.858,0.3132,0.1147
7,1976.3361,24043530.294,4903.4203,0.8678,0.4164,0.118
8,1955.4282,22710889.6489,4765.5944,0.8533,0.3463,0.121
9,2063.9119,25880610.0733,5087.2989,0.8333,0.4211,0.1301


In [13]:
print(tuned_dt)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mae', max_depth=10,
                      max_features=0.9631803029863928, max_leaf_nodes=None,
                      min_impurity_decrease=2.9543258674895983e-06,
                      min_impurity_split=None, min_samples_leaf=5,
                      min_samples_split=3, min_weight_fraction_leaf=0.0,
                      presort='deprecated', random_state=123, splitter='best')


## Ensemble Model

In [14]:
bagged_tuned_dt = ensemble_model(tuned_dt, n_estimators = 25)

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,1794.1205,18232946.5132,4270.0054,0.8911,0.3567,0.1419
1,2340.5536,30405691.1006,5514.1356,0.8232,0.4056,0.1657
2,1872.4326,20222683.1381,4496.9638,0.744,0.4541,0.166
3,2037.6146,20666488.0004,4546.0409,0.8274,0.3902,0.1927
4,2217.8189,25773675.1568,5076.778,0.8083,0.4658,0.1703
5,2022.749,17833269.7756,4222.9456,0.8853,0.3182,0.141
6,1668.6777,17697817.2298,4206.8774,0.8796,0.289,0.1363
7,2135.6614,25196120.8628,5019.5738,0.8615,0.4417,0.1474
8,1952.3761,20564678.8415,4534.8295,0.8672,0.3433,0.1627
9,2037.8192,24464511.6505,4946.1613,0.8424,0.4004,0.148


In [15]:
print(bagged_tuned_dt)

BaggingRegressor(base_estimator=DecisionTreeRegressor(ccp_alpha=0.0,
                                                      criterion='mae',
                                                      max_depth=10,
                                                      max_features=0.9631803029863928,
                                                      max_leaf_nodes=None,
                                                      min_impurity_decrease=2.9543258674895983e-06,
                                                      min_impurity_split=None,
                                                      min_samples_leaf=5,
                                                      min_samples_split=3,
                                                      min_weight_fraction_leaf=0.0,
                                                      presort='deprecated',
                                                      random_state=123,
                                                      splitter='best')

## Voting Ensemble

- Train individual models and blend them in some way to get metrics (e.g. get weightavg)

In [16]:
dt = create_model('dt', verbose = False)
lasso = create_model('lasso', verbose = False)
knn = create_model('knn', verbose = False)
blend = blend_models([dt, lasso, knn])

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,4254.7569,39288481.7049,6268.0525,0.7653,0.5396,0.5098
1,4549.3478,47410270.3769,6885.5116,0.7243,0.5491,0.446
2,3669.0042,30960224.8227,5564.1913,0.608,0.6144,0.5503
3,4057.8715,33086472.715,5752.0842,0.7237,0.604,0.6271
4,4866.2837,49027079.8801,7001.934,0.6354,0.6001,0.5892
5,4085.1706,38313912.7579,6189.8233,0.7535,0.4622,0.3828
6,4026.8133,43261254.2573,6577.3288,0.7056,0.4937,0.4382
7,3990.9587,37547483.1804,6127.6001,0.7935,0.4378,0.3313
8,4554.1224,44496301.0838,6670.5548,0.7126,0.5864,0.6296
9,4411.3834,44324814.4875,6657.6884,0.7145,0.5542,0.4481


In [17]:
print(blend)

VotingRegressor(estimators=[('dt',
                             DecisionTreeRegressor(ccp_alpha=0.0,
                                                   criterion='mse',
                                                   max_depth=None,
                                                   max_features=None,
                                                   max_leaf_nodes=None,
                                                   min_impurity_decrease=0.0,
                                                   min_impurity_split=None,
                                                   min_samples_leaf=1,
                                                   min_samples_split=2,
                                                   min_weight_fraction_leaf=0.0,
                                                   presort='deprecated',
                                                   random_state=123,
                                                   splitter='best')),
                            ('la

## Stacking Ensemble

- Output of one model is taken as Input of another model and get the Final metrics (without backpropogation)

In [18]:
stacker = stack_models([dt, lasso, knn])

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,3238.9721,24981487.0135,4998.1484,0.8508,0.4465,0.3617
1,3729.1062,35983103.2617,5998.5918,0.7908,0.5305,0.3502
2,3503.0224,30404562.7682,5514.0333,0.615,0.5763,0.4142
3,3039.4653,22225313.7487,4714.3731,0.8144,0.4638,0.4078
4,4066.2311,38028189.6221,6166.7001,0.7172,0.5441,0.426
5,3513.1981,28140892.9925,5304.7991,0.819,0.488,0.3699
6,3370.8115,31264927.806,5591.505,0.7873,0.4402,0.3302
7,3702.2301,31195043.4548,5585.2523,0.8285,0.4942,0.3036
8,3776.3036,28119756.9578,5302.8065,0.8184,0.5503,0.466
9,3840.9799,34641906.1381,5885.7375,0.7769,0.6165,0.4078


In [19]:
print(stacker)

StackingRegressor(cv=KFold(n_splits=10, random_state=RandomState(MT19937) at 0x20139F20DB0,
   shuffle=False),
                  estimators=[('dt',
                               DecisionTreeRegressor(ccp_alpha=0.0,
                                                     criterion='mse',
                                                     max_depth=None,
                                                     max_features=None,
                                                     max_leaf_nodes=None,
                                                     min_impurity_decrease=0.0,
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
                                                     presor...
                                     positive=Fa

# 👉 Analyse Model

In [20]:
evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [None]:
#interpret_model(dt)  # Not working on Laptop