In [1]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

In [2]:
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [4]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [5]:
independent=dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,False,True
1,18,33.770,1,True,False
2,28,33.000,3,True,False
3,33,22.705,0,True,False
4,32,28.880,0,True,False
...,...,...,...,...,...
1333,50,30.970,3,True,False
1334,18,31.920,0,False,False
1335,18,36.850,0,False,False
1336,21,25.800,0,False,False


In [6]:
dependent = dataset[['charges']]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [7]:
param_grid={
    "n_estimators": [50, 100, 150, 200],
    "criterion" : ["squared_error", "absolute_error", "friedman_mse", "poisson"],
    "max_depth": [5, 10, None],
    "min_samples_split": [2, 5, 10]
}
grid=GridSearchCV(RandomForestRegressor(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(independent,dependent)

Fitting 5 folds for each of 144 candidates, totalling 720 fits


  return fit_method(estimator, *args, **kwargs)


In [8]:
re=grid.cv_results_
print("The R_Score value for best parameter {}:".format(grid.best_params_))

The R_Score value for best parameter {'criterion': 'poisson', 'max_depth': 5, 'min_samples_split': 10, 'n_estimators': 150}:


In [9]:
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_depth,param_min_samples_split,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.175633,0.011340,0.013560,0.000830,squared_error,5,2,50,"{'criterion': 'squared_error', 'max_depth': 5,...",0.879496,0.797845,0.889575,0.837926,0.866990,0.854366,0.033153,35
1,0.340139,0.005958,0.022264,0.000564,squared_error,5,2,100,"{'criterion': 'squared_error', 'max_depth': 5,...",0.878786,0.797161,0.888257,0.840854,0.869757,0.854963,0.032970,32
2,0.467743,0.007677,0.030322,0.000234,squared_error,5,2,150,"{'criterion': 'squared_error', 'max_depth': 5,...",0.880767,0.798235,0.889012,0.840159,0.869522,0.855539,0.033084,24
3,0.616336,0.002805,0.041687,0.003829,squared_error,5,2,200,"{'criterion': 'squared_error', 'max_depth': 5,...",0.879801,0.798311,0.888968,0.840558,0.869924,0.855512,0.032902,25
4,0.159283,0.002449,0.013354,0.000479,squared_error,5,5,50,"{'criterion': 'squared_error', 'max_depth': 5,...",0.879140,0.798205,0.888955,0.839953,0.869058,0.855062,0.032819,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,2.321213,0.029512,0.101543,0.002380,poisson,,5,200,"{'criterion': 'poisson', 'max_depth': None, 'm...",0.858063,0.784899,0.868426,0.825490,0.850246,0.837425,0.029842,98
140,0.525529,0.013491,0.032069,0.001161,poisson,,10,50,"{'criterion': 'poisson', 'max_depth': None, 'm...",0.868595,0.788681,0.875813,0.824422,0.851375,0.841777,0.031906,80
141,1.026767,0.012080,0.057458,0.003919,poisson,,10,100,"{'criterion': 'poisson', 'max_depth': None, 'm...",0.869274,0.786659,0.872737,0.829583,0.852205,0.842091,0.031653,78
142,1.555018,0.011618,0.071743,0.004626,poisson,,10,150,"{'criterion': 'poisson', 'max_depth': None, 'm...",0.869384,0.788139,0.873443,0.831018,0.854174,0.843231,0.031308,72


In [10]:
result=grid.predict([[38, 27, 2, True, False]])
result



array([7066.00785212])