In [13]:
from tensorflow import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import MinMaxScaler
import statsmodels.api as sm 

# 결측치 탐지
import missingno as msno

In [4]:
(x_train, y_train), (x_test, y_test) = keras.datasets.boston_housing.load_data()

## 종속변수 확인

In [6]:
y_train[:5]

array([15.2, 42.3, 50. , 21.1, 17.7])

In [7]:
x_train.dtype

dtype('float64')

## 데이터 프레임으로 만들면 편함

In [8]:
train_df = pd.DataFrame(x_train)

In [9]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72
1,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11
2,4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26
3,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01
4,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65


In [11]:
train_df.isna().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
dtype: int64

In [12]:
train_df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
count,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0,404.0
mean,3.745111,11.480198,11.104431,0.061881,0.557356,6.267082,69.010644,3.740271,9.440594,405.898515,18.47599,354.783168,12.740817
std,9.240734,23.767711,6.811308,0.241238,0.117293,0.709788,27.940665,2.030215,8.69836,166.374543,2.200382,94.111148,7.254545
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,188.0,12.6,0.32,1.73
25%,0.081437,0.0,5.13,0.0,0.453,5.87475,45.475,2.0771,4.0,279.0,17.225,374.6725,6.89
50%,0.26888,0.0,9.69,0.0,0.538,6.1985,78.5,3.1423,5.0,330.0,19.1,391.25,11.395
75%,3.674808,12.5,18.1,0.0,0.631,6.609,94.1,5.118,24.0,666.0,20.2,396.1575,17.0925
max,88.9762,100.0,27.74,1.0,0.871,8.725,100.0,10.7103,24.0,711.0,22.0,396.9,37.97


- 단위가 많이 다르다~

In [14]:
x_sub, x_val, y_sub, y_val = train_test_split(
    x_train,
    y_train,
    test_size = 0.2,
    random_state = 34
)

In [15]:
x_sub.shape, x_val.shape, x_test.shape

((323, 13), (81, 13), (102, 13))

In [17]:
mm = MinMaxScaler()
scaled_sub = mm.fit_transform(x_sub)
scaled_val = mm.transform(x_val)
scaled_test = mm.transform(x_test)

# 모델 설계

In [18]:
model = keras.Sequential()

# 입력층
model.add(keras.Input(shape=(13,)))

# 은닉층
model.add(keras.layers.Dense(32, activation = "relu")) 
model.add(keras.layers.Dense(8, activation = "relu")) 

# 출력층
# model.add(keras.layers.Dense(1, activation="linear")) # linear 이거랑 비워두는거랑 같다!
model.add(keras.layers.Dense(1, activation="relu")) # 음수가 싫어요 => relu 써도 된다!

In [19]:
es_cb = keras.callbacks.EarlyStopping(
    patience= 8,
    restore_best_weights= True
)

In [20]:
model.compile(optimizer="adam", loss = "mae", metrics = ["root_mean_squared_error", "r2_score"])

In [21]:
model.summary()

In [22]:
model.fit(scaled_sub, y_sub, epochs=200, validation_data =(scaled_val, y_val)
         , callbacks= [es_cb], batch_size=12
         )

Epoch 1/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 22.1788 - r2_score: -5.6110 - root_mean_squared_error: 24.1160 - val_loss: 20.4666 - val_r2_score: -5.9940 - val_root_mean_squared_error: 22.1441
Epoch 2/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 20.9400 - r2_score: -5.0270 - root_mean_squared_error: 23.0262 - val_loss: 18.8363 - val_r2_score: -5.1057 - val_root_mean_squared_error: 20.6901
Epoch 3/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 19.0285 - r2_score: -4.2016 - root_mean_squared_error: 21.3914 - val_loss: 16.5391 - val_r2_score: -3.9904 - val_root_mean_squared_error: 18.7052
Epoch 4/200
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 16.4166 - r2_score: -3.1762 - root_mean_squared_error: 19.1673 - val_loss: 13.4349 - val_r2_score: -2.6887 - val_root_mean_squared_error: 16.0818
Epoch 5/200
[1m27/27[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1c4e7a7b620>

- val_r2_score: 0.5030... 으어어... 성능이 안좋다...

# 모델 성능 평가

In [23]:
model.evaluate(scaled_test,y_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2.9352 - r2_score: 0.7870 - root_mean_squared_error: 4.2106 


[2.935208797454834, 4.210590362548828, 0.7870223522186279]

In [24]:
y_pred = model.predict(scaled_test).flatten()

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [27]:
for i in range(10):
    print(f"실제가격 {y_test[i]} , 예상가격 {y_pred[i]}")

실제가격 7.2 , 예상가격 11.035684585571289
실제가격 18.8 , 예상가격 17.72910499572754
실제가격 19.0 , 예상가격 20.969379425048828
실제가격 27.0 , 예상가격 29.121356964111328
실제가격 22.2 , 예상가격 22.757389068603516
실제가격 24.5 , 예상가격 19.617252349853516
실제가격 31.2 , 예상가격 28.42461395263672
실제가격 22.9 , 예상가격 22.59920883178711
실제가격 20.5 , 예상가격 18.23196792602539
실제가격 23.2 , 예상가격 20.03685760498047
