**Installation of Packages:**

In [None]:
!pip install catboost

In [None]:
!pip install pycaret

**Importing the Required libararies**

In [17]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV

from sklearn.neighbors import KNeighborsRegressor

from sklearn.tree import DecisionTreeRegressor

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from xgboost import XGBRegressor
from catboost import CatBoostRegressor

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score


**Reading the Insurance Dataset from PyCaret**

In [18]:
from pycaret.datasets import get_data
insurance = get_data('insurance')

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [19]:
insurance.head(10)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552
5,31,female,25.74,0,no,southeast,3756.6216
6,46,female,33.44,1,no,southeast,8240.5896
7,37,female,27.74,3,no,northwest,7281.5056
8,37,male,29.83,2,no,northeast,6406.4107
9,60,female,25.84,0,no,northwest,28923.13692


In [20]:
insurance.shape

(1338, 7)

In [21]:
insurance.columns


Index(['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'charges'], dtype='object')

In [22]:
insurance.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [23]:
insurance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [24]:
insurance.describe(include=['O'])

Unnamed: 0,sex,smoker,region
count,1338,1338,1338
unique,2,2,4
top,male,no,southeast
freq,676,1064,364


In [25]:
insurance['sex']=insurance['sex'].map({'male':0,'female':1})
insurance['smoker']=insurance['smoker'].map({'yes':1,'no':0})
insurance['region']=insurance['region'].map({'southeast':0,'southwest':1,'northeast':2,'northwest':3})

**Training and Test data split for Insurance Dataset**

In [26]:
y = insurance[['charges']]
insurance_x = insurance.drop(columns=['charges'], axis=True)

In [27]:
X_train,X_test,y_train,y_test = train_test_split(insurance_x, y, random_state=41, test_size=0.2)

**Multiple Regression Model Evaluations**

In [28]:
multi_models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "K-Nearest Neighbors": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "AdaBoost": AdaBoostRegressor(),
    "XGBoost": XGBRegressor(),
    "CatBoost": CatBoostRegressor(verbose=False)
}

def mae_eval_model(y_test, y_train_pred):
  return mean_absolute_error(y_test, y_train_pred)

model_list=[]
r2_list=[]


## Training the Model
for i in range(len(list(multi_models))):
  model = list(multi_models.values())[i]
  model.fit(X_train, y_train)

  # Making Prediction
  y_test_pred = model.predict(X_test)

  # Evaluating the Test Dataset
  mae_model_test = mae_eval_model(y_test, y_test_pred)
  r2_model_test = r2_score(y_test, y_test_pred)

  # Appending the Results
  model_list.append({list(multi_models.keys())[i]:mae_model_test})
  r2_list.append({list(multi_models.keys())[i]:r2_model_test})


**Performance Metrics:**

In [29]:
for i in model_list:
  print(i)

{'Linear Regression': 4140.486874555646}
{'Ridge Regression': 4147.465455311316}
{'Lasso Regression': 4140.523239857567}
{'K-Nearest Neighbors': 7342.848892977612}
{'Decision Tree': 2871.767894216418}
{'Random Forest': 2463.45527451936}
{'Gradient Boosting': 2470.549364767739}
{'AdaBoost': 4081.7742314217926}
{'XGBoost': 2603.170227926836}
{'CatBoost': 2341.345559010927}


In [30]:
for i in r2_list:
  print(i)

{'Linear Regression': 0.6848380845442359}
{'Ridge Regression': 0.6857103347524061}
{'Lasso Regression': 0.6849072421758537}
{'K-Nearest Neighbors': 0.09822907169981254}
{'Decision Tree': 0.646990458893427}
{'Random Forest': 0.8089566244220651}
{'Gradient Boosting': 0.8216378505084782}
{'AdaBoost': 0.7632007974449078}
{'XGBoost': 0.8040768770125537}
{'CatBoost': 0.8222080658073795}


**Outputs**

In [31]:
df = pd.DataFrame({'y_test': y_test['charges'], 'y_test_pred': y_test_pred})

# Display the first 10 rows of the DataFrame
print(df.head(10))

           y_test   y_test_pred
1083   4076.49700   4190.363676
1237  12224.35085  11607.667630
519    3857.75925   5557.514796
79     6571.02435   6700.497836
930    2927.06470   3597.072239
393    9290.13950  10017.111758
1313  36397.57600  35360.797542
964   26467.09737  14896.092251
1310   6940.90985   8437.694729
880    3443.06400   4109.952558


In [32]:
y_test

Unnamed: 0,charges
1083,4076.49700
1237,12224.35085
519,3857.75925
79,6571.02435
930,2927.06470
...,...
1152,40941.28540
294,3906.12700
196,5649.71500
810,9414.92000


In [33]:
y_test_pred

array([ 4190.36367631, 11607.66762976,  5557.51479568,  6700.49783628,
        3597.07223869, 10017.11175841, 35360.79754196, 14896.09225121,
        8437.69472886,  4109.95255842, 36664.28361873, 16173.21367431,
        3150.11634978, 36213.98375189,  1093.18598475, 11468.20835177,
       13917.86414372,  6598.15466797, 15675.954191  , 23340.64589017,
        7751.46138015, 38841.82229507,  8595.09416276, 16582.33598254,
       14335.87701217, 12609.41472584,  6511.22564703,  3982.91970325,
        5462.2636094 , 16149.83523496,  9466.97775705,  1566.80821297,
        7389.58925062, 13338.92631035,  6590.36843949, 19229.05590788,
        9906.35788831, 14944.3283854 ,  1955.40563563, 24714.67002503,
       17789.32668782, 24745.58706127, 10642.07322361, 13987.56862962,
        8133.59441328,  4355.0684214 ,  5959.29673297, 40966.99751197,
       14072.05758153,  6248.46133551,  1338.61111903,  5978.70708925,
        7860.28592643,  7848.12392575,  5261.71360533, 11483.40915835,
      