In [1]:
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_csv('insurance_pre.csv')

In [3]:
dataset.head()

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [4]:
dataset = pd.get_dummies(dataset, drop_first=True)

In [5]:
dataset.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,False,True
1,18,33.77,1,1725.5523,True,False
2,28,33.0,3,4449.462,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.88,0,3866.8552,True,False


In [6]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [7]:
independent = dataset[['age', 'bmi', 'children','sex_male', 'smoker_yes']]
dependent = dataset['charges']

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.2, random_state=0)

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import GridSearchCV
# creating the parameter grid

# criterion{“squared_error”, “absolute_error”, “friedman_mse”, “poisson”}, default=”squared_error”
# max_features{“sqrt”, “log2”, None},
param_grid={'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'],
                                        'max_features': ['sqrt', 'log2'],
                                        'n_estimators': [10, 100],
                                        }



grid  = GridSearchCV(RandomForestRegressor(), param_grid=param_grid, refit=True, verbose=3, n_jobs=-1)
# fitting the model for gid search
grid.fit(X_train, y_train)






Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [13]:
# print best parameter after tuning
# print(grid.best_params_)
re = grid.cv_results_
print(re)

grid_predictions = grid.predict(X_test)

# print report
from sklearn.metrics import r2_score
r2_score=r2_score(y_test, grid_predictions)

print("The R_score value for best parameter {}:".format(grid.best_params_), r2_score)

{'mean_fit_time': array([0.04173112, 0.32798524, 0.03101726, 0.29855323, 0.03269935,
       0.3358943 , 0.02847195, 0.34364033, 0.10906425, 0.95596576,
       0.10936399, 0.98837748, 0.0523891 , 0.35485983, 0.03882756,
       0.26067553]), 'std_fit_time': array([0.00734116, 0.02065213, 0.00308247, 0.01770908, 0.00387096,
       0.04179012, 0.00093898, 0.04835424, 0.02224307, 0.044615  ,
       0.02749932, 0.06150867, 0.01210361, 0.0310445 , 0.00569663,
       0.00512946]), 'mean_score_time': array([0.00331435, 0.01228991, 0.00250783, 0.01202731, 0.00211406,
       0.01555858, 0.00241351, 0.01147928, 0.00361762, 0.01261406,
       0.00351295, 0.01029744, 0.00472422, 0.01339412, 0.00200377,
       0.00732017]), 'std_score_time': array([0.00040336, 0.00102294, 0.00063734, 0.00155835, 0.0009741 ,
       0.00499989, 0.00038262, 0.00171529, 0.00247071, 0.00470623,
       0.00215619, 0.00131307, 0.00337379, 0.00404714, 0.00031584,
       0.00086658]), 'param_criterion': masked_array(data=['sq

In [14]:
table = pd.DataFrame.from_dict(re)

In [15]:
table.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.041731,0.007341,0.003314,0.000403,squared_error,sqrt,10,"{'criterion': 'squared_error', 'max_features':...",0.826807,0.804915,0.775407,0.816222,0.75625,0.79592,0.026236,14
1,0.327985,0.020652,0.01229,0.001023,squared_error,sqrt,100,"{'criterion': 'squared_error', 'max_features':...",0.852322,0.821864,0.801634,0.836053,0.772169,0.816808,0.027857,2
2,0.031017,0.003082,0.002508,0.000637,squared_error,log2,10,"{'criterion': 'squared_error', 'max_features':...",0.823933,0.79921,0.778199,0.820793,0.74673,0.793773,0.028722,16
3,0.298553,0.017709,0.012027,0.001558,squared_error,log2,100,"{'criterion': 'squared_error', 'max_features':...",0.84745,0.820267,0.805883,0.833632,0.77131,0.815708,0.026143,4
4,0.032699,0.003871,0.002114,0.000974,friedman_mse,sqrt,10,"{'criterion': 'friedman_mse', 'max_features': ...",0.82342,0.793809,0.800404,0.81533,0.762872,0.799167,0.020962,13


In [16]:
table.to_csv('gridsearch_randomr_forest_result.csv', index=False)


In [17]:
age_input = float(input("Age: "))
bmi_input = float(input("BMI: "))
children_input = float(input("Children:"))
sex_male_input = int(input("Sex Male 0 or 1:"))
smoker_yes_input = int(input("Smoker Yes 0 or  1:"))

                     

In [20]:
Future_Prediction=grid.predict([[age_input, bmi_input, children_input, sex_male_input, smoker_yes_input]]) # change the parameters according to your need
print("The future prediction for the charges is: ", Future_Prediction)

The future prediction for the charges is:  [14333.2481488]
