# Decision Tree Regressor

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [4]:
data = pd.read_csv("insurance_pre.csv")
data.head(5)

Unnamed: 0,age,sex,bmi,children,smoker,charges
0,19,female,27.9,0,yes,16884.924
1,18,male,33.77,1,no,1725.5523
2,28,male,33.0,3,no,4449.462
3,33,male,22.705,0,no,21984.47061
4,32,male,28.88,0,no,3866.8552


In [9]:
df = pd.get_dummies(data,drop_first = True,dtype=int)

In [11]:
independent = df[['age', 'bmi', 'children','sex_male', 'smoker_yes']]

In [13]:
dependent = df[["charges"]]

In [15]:
independent.head(5)


Unnamed: 0,age,bmi,children,sex_male,smoker_yes
0,19,27.9,0,0,1
1,18,33.77,1,1,0
2,28,33.0,3,1,0
3,33,22.705,0,1,0
4,32,28.88,0,1,0


In [17]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(independent,dependent,test_size=0.30,random_state=0)

In [19]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [21]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV

param_grid = {'criterion':['mse','mae','friedman_mse'],
'max_features': ['auto','sqrt','log2'],
'splitter':['best','random']}

In [23]:
grid = GridSearchCV(DecisionTreeRegressor(),param_grid,refit=True,verbose=3,n_jobs=-1)
grid.fit(X_train,y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [25]:
y_pred = grid.predict(X_test)

In [27]:
from sklearn.metrics import r2_score
r2score = r2_score(y_test,y_pred)
r2score

0.7158822100985474

In [30]:
print(grid.best_params_)

{'criterion': 'friedman_mse', 'max_features': 'log2', 'splitter': 'best'}


In [32]:
re=grid.cv_results_
table = pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.0,0.0,0.0,0.0,mse,auto,best,"{'criterion': 'mse', 'max_features': 'auto', '...",,,,,,,,5
1,0.0,0.0,0.0,0.0,mse,auto,random,"{'criterion': 'mse', 'max_features': 'auto', '...",,,,,,,,5
2,0.000602,0.000738,0.0,0.0,mse,sqrt,best,"{'criterion': 'mse', 'max_features': 'sqrt', '...",,,,,,,,5
3,0.0,0.0,0.0,0.0,mse,sqrt,random,"{'criterion': 'mse', 'max_features': 'sqrt', '...",,,,,,,,5
4,0.0,0.0,0.0,0.0,mse,log2,best,"{'criterion': 'mse', 'max_features': 'log2', '...",,,,,,,,5
5,0.001441,0.002882,0.0,0.0,mse,log2,random,"{'criterion': 'mse', 'max_features': 'log2', '...",,,,,,,,5
6,0.0,0.0,0.0,0.0,mae,auto,best,"{'criterion': 'mae', 'max_features': 'auto', '...",,,,,,,,5
7,0.002001,0.004001,0.0,0.0,mae,auto,random,"{'criterion': 'mae', 'max_features': 'auto', '...",,,,,,,,5
8,0.002001,0.004001,0.0,0.0,mae,sqrt,best,"{'criterion': 'mae', 'max_features': 'sqrt', '...",,,,,,,,5
9,0.000993,0.001034,0.0,0.0,mae,sqrt,random,"{'criterion': 'mae', 'max_features': 'sqrt', '...",,,,,,,,5


In [44]:
age_input=float(input("Age:"))
bmi_input=float(input("BMI:"))
children_input=float(input("Children:"))
sex_male_input=int(input("Sex Male 0 or 1:"))
smoker_yes_input=int(input("Smoker Yes 0 or 1:"))


Age: 19
BMI: 27.900
Children: 0
Sex Male 0 or 1: 0
Smoker Yes 0 or 1: 1


In [46]:
new_data = [[age_input, bmi_input, children_input, sex_male_input, smoker_yes_input]]
# IMPORTANT: Use transform(), NOT fit_transform()
new_data_scaled = sc.transform(new_data)  
print(new_data_scaled)

[[-1.46169465 -0.45767803 -0.89833872 -0.97676557  1.98149332]]


In [48]:
# Make prediction using trained model
Future_Prediction = grid.predict(new_data_scaled)

print("Future_Prediction={}".format(Future_Prediction))

Future_Prediction=[16884.924]
