#### 라이브러리 불러오기

In [9]:
import os
import re
import platform

import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from tensorflow.keras.layers import Dense

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt

#### 설정

In [10]:
SEED = 1234

#### 모델 평가함수 생성

In [11]:
# 예측대상(test_y)값이 0인 경우를 대비해, 
# 0이 아닌 아주 작은수를 더해 MAPE값을 구함
epsilon = 1e-10

def user_mape(y_test, y_pred):
    p_err = (1 - (np.abs(y_test-y_pred) / (y_test+epsilon))) * 100
    return np.mean(p_err)

def evaluation_func(y_test, y_pred, verbose=1):
    mse, rmse, mae, r2score, mape, mape2 = 0, 0, 0, 0, 0, 0
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2score = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    mape2 = user_mape(y_test, y_pred)
    
    if verbose != 0:
        print(
            f'MSE: {mse:.6f}, RMSE: {rmse:.6f}, '
            f'MAE: {mae:.6f}, R2_SCORE: {r2score:.6f}, '
            f'MAPE: {mape:.6f}, MAPE2: {mape2:.6f}'
        )
        
    return mse, rmse, mae, mape, mape2

#### 데이터 불러오기

In [12]:
PATH_DATA = os.path.join(os.getcwd(), 'data')
PATH_XLS = os.path.join(PATH_DATA, '11-boston-housing.xls')

In [13]:
df_boston = pd.read_excel(PATH_XLS)

#### 데이터 전처리

In [14]:
df_boston.isna().sum()

CRIM         0
ZN           0
INDUS        0
CHAS         0
NOX          0
RM           0
AGE          0
DIS          0
RAD          0
TAX          0
PTRATIO      0
B            0
LSTAT        0
MEDV         0
CAT. MEDV    0
dtype: int64

In [15]:
# 속성과 예측값 분류
target_col = 'MEDV'
df_X = df_boston.drop(target_col, axis=1)
df_y = df_boston[target_col]

X_train, X_test, y_train, y_test = \
    train_test_split(df_X, df_y, test_size=0.3, random_state=SEED)

#### 모델링

In [16]:
# 특별한 처리없이 기본값으로 처리
model_dtr = DecisionTreeRegressor().fit(X_train, y_train)
pred_dtr = model_dtr.predict(X_test)
_ = evaluation_func(y_test, pred_dtr)

MSE: 16.089737, RMSE: 4.011201, MAE: 2.903947, R2_SCORE: 0.825453, MAPE: 0.149169, MAPE2: 85.083054
