In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn import tree
from sklearn.tree import DecisionTreeRegressor
from statsmodels.api import OLS
from sklearn.linear_model import LinearRegression, Ridge, Lasso

price = load_boston()
df = pd.DataFrame(price.data, columns=price.feature_names)
df['PRICE'] = price.target
df['const'] = 1

print(df.head(10))

      CRIM    ZN  INDUS  CHAS    NOX     RM    AGE     DIS  RAD    TAX  PTRATIO       B  LSTAT  PRICE  const
0  0.00632  18.0   2.31   0.0  0.538  6.575   65.2  4.0900  1.0  296.0     15.3  396.90   4.98   24.0      1
1  0.02731   0.0   7.07   0.0  0.469  6.421   78.9  4.9671  2.0  242.0     17.8  396.90   9.14   21.6      1
2  0.02729   0.0   7.07   0.0  0.469  7.185   61.1  4.9671  2.0  242.0     17.8  392.83   4.03   34.7      1
3  0.03237   0.0   2.18   0.0  0.458  6.998   45.8  6.0622  3.0  222.0     18.7  394.63   2.94   33.4      1
4  0.06905   0.0   2.18   0.0  0.458  7.147   54.2  6.0622  3.0  222.0     18.7  396.90   5.33   36.2      1
5  0.02985   0.0   2.18   0.0  0.458  6.430   58.7  6.0622  3.0  222.0     18.7  394.12   5.21   28.7      1
6  0.08829  12.5   7.87   0.0  0.524  6.012   66.6  5.5605  5.0  311.0     15.2  395.60  12.43   22.9      1
7  0.14455  12.5   7.87   0.0  0.524  6.172   96.1  5.9505  5.0  311.0     15.2  396.90  19.15   27.1      1
8  0.21124  12.5   

In [6]:
# Split
x_train, x_test, y_train, y_test  = train_test_split(price.data, price.target, test_size=0.3,
                                                    random_state=10)

In [7]:
ols_reg = LinearRegression()
ols_reg.fit(x_train, y_train)
ols_pred = ols_reg.predict(x_test)

print("MAE:", metrics.mean_absolute_error(y_test, ols_pred))
print("MSE:", metrics.mean_squared_error(y_test, ols_pred))
print("sqrt(MSE):", np.sqrt(metrics.mean_squared_error(y_test, ols_pred)))

MAE: 3.70712713727181
MSE: 29.326596526123172
sqrt(MSE): 5.415403634644713


In [8]:
reg_tree = DecisionTreeRegressor()
reg_tree.fit(x_train, y_train)
tree_pred = reg_tree.predict(x_test)

print("MAE:", metrics.mean_absolute_error(y_test, tree_pred))
print("MSE:", metrics.mean_squared_error(y_test, tree_pred))
print("sqrt(MSE):", np.sqrt(metrics.mean_squared_error(y_test, tree_pred)))

MAE: 3.0085526315789473
MSE: 20.601644736842108
sqrt(MSE): 4.538903472959312


In [9]:
reg_tree = DecisionTreeRegressor(max_depth=4)
reg_tree.fit(x_train, y_train)
tree_pred = reg_tree.predict(x_test)

print("MAE:", metrics.mean_absolute_error(y_test, tree_pred))
print("MSE:", metrics.mean_squared_error(y_test, tree_pred))
print("sqrt(MSE):", np.sqrt(metrics.mean_squared_error(y_test, tree_pred)))

MAE: 3.0908089251522357
MSE: 21.23070009405348
sqrt(MSE): 4.607678384398533


In [11]:
reg_tree = DecisionTreeRegressor(max_depth=6)
reg_tree.fit(x_train, y_train)
tree_pred = reg_tree.predict(x_test)

print("MAE:", metrics.mean_absolute_error(y_test, tree_pred))
print("MSE:", metrics.mean_squared_error(y_test, tree_pred))
print("sqrt(MSE):", np.sqrt(metrics.mean_squared_error(y_test, tree_pred)))

MAE: 2.885423691440139
MSE: 19.77843636315064
sqrt(MSE): 4.447295398683411


In [12]:
reg_tree = DecisionTreeRegressor(max_depth=5)
reg_tree.fit(x_train, y_train)
tree_pred = reg_tree.predict(x_test)

print("MAE:", metrics.mean_absolute_error(y_test, tree_pred))
print("MSE:", metrics.mean_squared_error(y_test, tree_pred))
print("sqrt(MSE):", np.sqrt(metrics.mean_squared_error(y_test, tree_pred)))

MAE: 2.8815412771364866
MSE: 19.591104561378664
sqrt(MSE): 4.426183972834688
