In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree

data = pd.read_csv("CarPrice.csv")

bx = data.drop(['price'], axis=1)
by = data[['price']]

#carname
#bx['CarName'] = bx['CarName'].astype('category').cat.codes
bx = bx.drop(['CarName'], axis=1)

#fueltype
fueltypeToInt = {
    "gas": 0,
    "diesel": 1
}
bx['fueltype'] = [fueltypeToInt[x] for x in bx['fueltype']]

#aspiration
aspirationToInt = {
    "std": 0,
    "turbo": 1
}
bx['aspiration'] = [aspirationToInt[x] for x in bx['aspiration']]

#doornumber
doornumberToInt = {
    "two": 2,
    "four": 4
}
bx['doornumber'] = [doornumberToInt[x] for x in bx['doornumber']]

#carbody
carbodyToInt = {
    "convertible": 0,
    "hatchback": 1,
    "sedan": 2,
    "wagon": 3,
    "hardtop": 4
}
bx['carbody'] = [carbodyToInt[x] for x in bx['carbody']]

#drivewheel
drivewheelToInt = {
    "rwd": 0,
    "fwd": 1,
    "4wd": 2
}
bx['drivewheel'] = [drivewheelToInt[x] for x in bx['drivewheel']]

#enginelocation
enginelocationToInt = {
    "front": 0,
    "rear": 1
}
bx['enginelocation'] = [enginelocationToInt[x] for x in bx['enginelocation']]

#enginetype
bx['enginetype'] = bx['enginetype'].astype('category').cat.codes

#cylindernumber
cylindernumberToInt = {
    "two": 2,
    "three": 3,
    "four": 4,
    "five": 5,
    "six": 6,
    "eight": 8,
    "twelve": 12
}
bx['cylindernumber'] = [cylindernumberToInt[x] for x in bx['cylindernumber']]

#fuelsystem
bx['fuelsystem'] = bx['fuelsystem'].astype('category').cat.codes

repeats = 5
mses = []
r2s = []

scaler = MinMaxScaler()

for i in range(0, repeats):
    bx_train, bx_test, by_train, by_test = train_test_split(bx, by, test_size=0.25)

    fit_scalar = scaler.fit(bx_train)
    bx_train_scaled = fit_scalar.transform(bx_train)

    dtm = DecisionTreeRegressor(max_depth=3)
    dtm.fit(bx_train_scaled, by_train.values.ravel())

    bx_test_scaled = fit_scalar.transform(bx_test)
    by_pred = dtm.predict(bx_test_scaled)
    mses.append(mean_squared_error(by_test, by_pred))
    r2s.append(r2_score(by_test, by_pred))

print("Mean MSE:", np.mean(mses))
print("Standard deviation:", np.std(mses))
print("R^2:", np.mean(r2s))

plt.figure(figsize=(13, 13))
tree.plot_tree(dtm, feature_names=['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], rounded=True, filled=True, proportion=True)

Mean MSE: 10688091.139380872
Standard deviation: 3132642.696166991
R^2: 0.8268847610364641
