In [11]:
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor

In [12]:
dataset = pd.read_csv("insurance_pre.csv")
dataset

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.900,0,yes,16884.92400
1,18,male,33.770,1,no,1725.55230
2,28,male,33.000,3,no,4449.46200
3,33,male,22.705,0,no,21984.47061
4,32,male,28.880,0,no,3866.85520
...,...,...,...,...,...,...
1333,50,male,30.970,3,no,10600.54830
1334,18,female,31.920,0,no,2205.98080
1335,18,female,36.850,0,no,1629.83350
1336,21,female,25.800,0,no,2007.94500


In [13]:
dataset=pd.get_dummies(dataset,drop_first=True)
dataset

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.900,0,16884.92400,False,True
1,18,33.770,1,1725.55230,True,False
2,28,33.000,3,4449.46200,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.880,0,3866.85520,True,False
...,...,...,...,...,...,...
1333,50,30.970,3,10600.54830,True,False
1334,18,31.920,0,2205.98080,False,False
1335,18,36.850,0,1629.83350,False,False
1336,21,25.800,0,2007.94500,False,False


In [14]:
dataset.columns

Index(['age', 'bmi', 'children', 'charges', 'sex_male', 'smoker_yes'], dtype='object')

In [16]:
independent=dataset[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
independent

Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.900,0,False,True
1,18,33.770,1,True,False
2,28,33.000,3,True,False
3,33,22.705,0,True,False
4,32,28.880,0,True,False
...,...,...,...,...,...
1333,50,30.970,3,True,False
1334,18,31.920,0,False,False
1335,18,36.850,0,False,False
1336,21,25.800,0,False,False


In [6]:
dependent = dataset[['charges']]
dependent

Unnamed: 0,charges
0,16884.92400
1,1725.55230
2,4449.46200
3,21984.47061
4,3866.85520
...,...
1333,10600.54830
1334,2205.98080
1335,1629.83350
1336,2007.94500


In [17]:
param_grid={
    "criterion": ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "splitter" : ["best","random"],
    "max_features": ["sqrt", "log2",None]
}
grid=GridSearchCV(DecisionTreeRegressor(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(independent,dependent)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


In [18]:
re=grid.cv_results_
print("The R_Score value for best parameter {}:".format(grid.best_params_))

The R_Score value for best parameter {'criterion': 'squared_error', 'max_features': 'log2', 'splitter': 'best'}:


In [19]:
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.012998,0.001089,0.00722,0.001485,squared_error,sqrt,best,"{'criterion': 'squared_error', 'max_features':...",0.683647,0.62271,0.774634,0.72559,0.722295,0.705775,0.05059,3
1,0.013848,0.004164,0.01118,0.002934,squared_error,sqrt,random,"{'criterion': 'squared_error', 'max_features':...",0.682203,0.491918,0.522156,0.711342,0.674141,0.616352,0.090616,24
2,0.020318,0.003245,0.008623,0.001417,squared_error,log2,best,"{'criterion': 'squared_error', 'max_features':...",0.747763,0.709497,0.748759,0.732051,0.718466,0.731307,0.015598,1
3,0.016396,0.002804,0.009673,0.002871,squared_error,log2,random,"{'criterion': 'squared_error', 'max_features':...",0.68353,0.449711,0.607768,0.73627,0.627973,0.62105,0.096707,22
4,0.021337,0.00241,0.008157,0.000409,squared_error,,best,"{'criterion': 'squared_error', 'max_features':...",0.718407,0.647404,0.726445,0.731345,0.667751,0.69827,0.034094,7
5,0.016639,0.002893,0.00846,0.00114,squared_error,,random,"{'criterion': 'squared_error', 'max_features':...",0.718339,0.597934,0.698992,0.654776,0.718906,0.677789,0.04624,16
6,0.015316,0.002658,0.00727,0.000922,friedman_mse,sqrt,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.699496,0.43303,0.681413,0.703446,0.709747,0.645426,0.106615,20
7,0.012233,0.002112,0.007337,0.001364,friedman_mse,sqrt,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.578582,0.528408,0.702063,0.628877,0.64652,0.61689,0.059283,23
8,0.014073,0.002681,0.007557,0.000598,friedman_mse,log2,best,"{'criterion': 'friedman_mse', 'max_features': ...",0.721285,0.626324,0.749468,0.650816,0.68469,0.686516,0.044884,14
9,0.013262,0.00297,0.008933,0.003455,friedman_mse,log2,random,"{'criterion': 'friedman_mse', 'max_features': ...",0.64154,0.530991,0.68409,0.665239,0.616909,0.627754,0.053385,21


In [20]:
result=grid.predict([[38, 27, 2, True, False]])
result



array([6455.86265])