In [1]:
# Importing Required Libraries
import pandas as pd
import pickle 
import numpy as np

# Importing Dataset 
dataset=pd.read_csv("insurance_pre.csv")
dataset=pd.get_dummies(dataset,drop_first=True)
dataset=dataset.astype(int)

# Input and Output dataset split
independent=dataset[["age","bmi","children","sex_male","smoker_yes"]]
dependent=dataset[["charges"]]

# Preprocessing Input and Output dataset
from sklearn.preprocessing import StandardScaler
sc_X=StandardScaler()
independent=sc_X.fit_transform(independent)

from sklearn.preprocessing import StandardScaler
sc_Y=StandardScaler()
dependent=sc_Y.fit_transform(dependent)

# Train & Test Data Split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(independent,dependent,test_size=0.20,random_state=0)

# LG Boost Regressor Model Creation
from sklearn.model_selection import GridSearchCV
!pip install xgboost
import xgboost
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
param_grid = {'n_estimators': [100, 200], 'learning_rate': [0.05, 0.1], 'max_depth': [3, 6], 'min_child_weight': [1, 5], 'subsample': [0.8], 'colsample_bytree': [0.8], 'gamma': [0, 0.1], 'reg_alpha': [0.0, 0.01], 'reg_lambda': [0.0, 0.01], 'random_state': [0]}
grid=GridSearchCV(XGBRegressor(),param_grid,scoring='r2',refit=True,verbose=3,n_jobs=-1)
grid=grid.fit(X_train,Y_train)

# Evaluation Metrics
Y_pred=grid.predict(X_test)
from sklearn.metrics import r2_score
r_score=r2_score(Y_test,Y_pred)
best_score=grid.best_score_
print("The best hyper tunning parameter combination for XG Boost Regressor model is {}".format(grid.best_params_))
print("The best score for the  XG Boost Regressor is :",best_score)
print("The r2 score for the XG Boost Regressor is :",r_score)

Fitting 5 folds for each of 128 candidates, totalling 640 fits
The best hyper tunning parameter combination for XG Boost Regressor model is {'colsample_bytree': 0.8, 'gamma': 0.1, 'learning_rate': 0.05, 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 100, 'random_state': 0, 'reg_alpha': 0.01, 'reg_lambda': 0.01, 'subsample': 0.8}
The best score for the  XG Boost Regressor is : 0.8489386757481949
The r2 score for the XG Boost Regressor is : 0.8970759958216388


In [2]:
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27,0,16884,0,1
1,18,33,1,1725,1,0
2,28,33,3,4449,1,0
3,33,22,0,21984,1,0
4,32,28,0,3866,1,0
...,...,...,...,...,...,...
1333,50,30,3,10600,1,0
1334,18,31,0,2205,0,0
1335,18,36,0,1629,0,0
1336,21,25,0,2007,0,0


In [3]:
# Saving and Loading the Best Model
saved_model=pickle.dump(grid,open("FinalModel_InsurancePrediction.sav",'wb'))
loaded_model=pickle.load(open("FinalModel_InsurancePrediction.sav",'rb'))
# User Input
age_in=int(input("Your Age:"))
bmi_in=int(input("Your BMI:"))
children_in=int(input("How many children"))
gender_in=int(input("Your Gender:"))
smoker_in=int(input("Smoker or not if smoker enter '1' else '0':"))
pre_input=sc_X.transform([[61,29,0,0,1]])
result=loaded_model.predict(pre_input)
pre_output=sc_Y.inverse_transform(result.reshape(-1,1))
print("Insurance Charges :",pre_output)

Your Age: 61
Your BMI: 29
How many children 0
Your Gender: 0
Smoker or not if smoker enter '1' else '0': 1


Insurance Charges : [[28140.102]]


