# Decision Tree Regression

## Importing The Library

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk

## Importing The DataSet

In [2]:
data = pd.read_csv("insurance.csv")
data.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## Encoding categorical data

In [3]:
obj_column = list(data.select_dtypes(['object']).columns)
data_encoded = pd.get_dummies(data=data,columns=obj_column)
data_encoded = data_encoded[["sex_female",'sex_male','smoker_no','smoker_yes',\
                             'region_northeast','region_northwest','region_southeast',\
                             'region_southwest','age','bmi','children','charges']]
data_encoded.head()

Unnamed: 0,sex_female,sex_male,smoker_no,smoker_yes,region_northeast,region_northwest,region_southeast,region_southwest,age,bmi,children,charges
0,1,0,0,1,0,0,0,1,19,27.9,0,16884.924
1,0,1,1,0,0,0,1,0,18,33.77,1,1725.5523
2,0,1,1,0,0,0,1,0,28,33.0,3,4449.462
3,0,1,1,0,0,1,0,0,33,22.705,0,21984.47061
4,0,1,1,0,0,1,0,0,32,28.88,0,3866.8552


In [5]:
x = data_encoded.iloc[:, :-1].values
y = data_encoded.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 0)

## Training the Decision Tree Regression model on the Training set

In [7]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor()
regressor.fit(x_train,y_train)

DecisionTreeRegressor()

## Predicting the Test set results

In [8]:
y_pred = regressor.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[10797.34  9724.53]
 [ 8871.15  8547.69]
 [42983.46 45702.02]
 [12574.05 12950.07]
 [ 9566.99  9644.25]
 [21984.47  4500.34]
 [ 2196.47  2198.19]
 [10848.13 11436.74]
 [ 7281.51  7537.16]
 [ 6128.8   5425.02]
 [27375.9   6753.04]
 [ 8932.08 10493.95]
 [ 7345.08  7337.75]
 [ 4571.41  4185.1 ]
 [18246.5  18310.74]
 [10560.49 10702.64]
 [12323.94 12523.6 ]
 [ 5594.85  3490.55]
 [ 6548.2   6457.84]
 [34254.05 33475.82]
 [24667.42 23967.38]
 [12269.69 12643.38]
 [10797.34 23045.57]
 [24520.26 23065.42]
 [ 1391.53  1674.63]
 [ 6551.75  4667.61]
 [ 2680.95  3732.63]
 [ 6600.36  7682.67]
 [ 3757.84  3756.62]
 [ 8027.97  8413.46]
 [ 7201.7   8059.68]
 [47896.79 48970.25]
 [21797.   12979.36]
 [10713.64 20630.28]
 [15359.1  14571.89]
 [ 3943.6   4137.52]
 [ 8240.59  8347.16]
 [37165.16 51194.56]
 [39983.43 40003.33]
 [23082.96  1880.49]
 [21984.47  5458.05]
 [ 3062.51  2867.12]
 [19539.24 20149.32]
 [44400.41 47496.49]
 [36307.8  36149.48]
 [ 3579.83 26018.95]
 [10560.49 19749.38]
 [ 7046.72  6

## Evaluating the Model Performance

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.7704518851887988

In [15]:
regressor.predict([[1,0,0,1,1,0,1,1,25,26,3]])

array([17085.27])