<a href="https://colab.research.google.com/github/chaerui7967/ko_Apartment_Deal/blob/master/DNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 서울 집값 예측

In [None]:
import tensorflow as tf

tf.__version__

'2.6.0'

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/project/seoul_5.csv')

In [None]:
df.head(1)

Unnamed: 0,gu_l,dong_l,used_y,square,per_cost_man,Price per squar,floor,top10,YN_r,YN_top10,s_c,Hospital,cost
0,0,8,32,77.75,1922.829582,2122.127392,7,0,1,1,1,0.0,149500


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df.drop(['cost'], axis = 1)
y = df['cost']

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                   test_size = 0.2,
                                                   random_state = 99)

## Standardization

In [None]:
mean = X_train.mean(axis = 0)
std = X_train.std(axis = 0)

X_train = X_train - mean
X_train = X_train / std

X_test = X_test - mean
X_test = X_test / std

# Train & Validation Split

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, 
                                                      test_size = 0.2, 
                                                      random_state = 99) 

X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

((73050, 12), (18263, 12), (73050,), (18263,))

## DNN
### 1) Model Define

In [None]:
from tensorflow.keras import models, layers

seoul = models.Sequential(name = 'Regression')
seoul.add(layers.Dense(36, activation = 'relu', input_shape = (12,)))
seoul.add(layers.Dense(60))
seoul.add(layers.BatchNormalization())
seoul.add(layers.Activation('relu'))
seoul.add(layers.Dropout(0.2))
seoul.add(layers.Dense(60))
seoul.add(layers.Activation('relu'))
seoul.add(layers.Dropout(0.2))
seoul.add(layers.Dense(36, activation = 'relu'))
seoul.add(layers.Dense(1))

In [None]:
seoul.summary()

Model: "Regression"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 36)                468       
_________________________________________________________________
dense_12 (Dense)             (None, 60)                2220      
_________________________________________________________________
batch_normalization_5 (Batch (None, 60)                240       
_________________________________________________________________
activation_5 (Activation)    (None, 60)                0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 60)                0         
_________________________________________________________________
dense_13 (Dense)             (None, 60)                3660      
_________________________________________________________________
activation_6 (Activation)    (None, 60)                0

### 2) Model Compile

In [None]:
seoul.compile(loss = 'mse',
               optimizer = 'rmsprop',
               metrics = ['mae'])

### 3) Model Fit

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

es = EarlyStopping(monitor = 'val_mae', 
                   mode = 'min', 
                   patience = 50, 
                   verbose = 1)

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

mc = ModelCheckpoint('best_seoul.h5', 
                     monitor = 'val_mae', 
                     mode = 'min', 
                     save_best_only = True,
                     verbose = 1)

In [None]:
Hist_seoul = seoul.fit(X_train, y_train,
                         epochs = 500,
                         batch_size = 1,
                         callbacks = [es, mc],
                         validation_data = (X_valid, y_valid))

Epoch 1/500

Epoch 00001: val_mae improved from inf to 88480.38281, saving model to best_seoul.h5
Epoch 2/500

Epoch 00002: val_mae improved from 88480.38281 to 88420.28906, saving model to best_seoul.h5
Epoch 3/500

Epoch 00003: val_mae improved from 88420.28906 to 88359.48438, saving model to best_seoul.h5
Epoch 4/500

Epoch 00004: val_mae improved from 88359.48438 to 88298.77344, saving model to best_seoul.h5
Epoch 5/500

Epoch 00005: val_mae improved from 88298.77344 to 88238.14844, saving model to best_seoul.h5
Epoch 6/500

Epoch 00006: val_mae improved from 88238.14844 to 88178.03125, saving model to best_seoul.h5
Epoch 7/500

Epoch 00007: val_mae improved from 88178.03125 to 88117.07031, saving model to best_seoul.h5
Epoch 8/500

Epoch 00008: val_mae improved from 88117.07031 to 88057.15625, saving model to best_seoul.h5
Epoch 9/500

Epoch 00009: val_mae improved from 88057.15625 to 87996.41406, saving model to best_seoul.h5
Epoch 10/500

Epoch 00010: val_mae improved from 87996

### 4) Model Evaluate

In [None]:
test_mse_score, test_mae_score = boston.evaluate(X_test, y_test)

print('MAE is :',test_mae_score)

### 5) Visualization

In [None]:
epochs = range(1, len(Hist_seoul.history['val_mae']) + 1)

plt.figure(figsize = (9, 6))
plt.plot(epochs, Hist_seoul.history['val_mae'])
plt.title('Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error')
plt.grid()
plt.show()

In [None]:
def smooth_curve(points, factor=0.9):
  smoothed_points = []
  for point in points:
    if smoothed_points:
      previous = smoothed_points[-1]
      smoothed_points.append(previous * factor + point * (1 - factor))
    else:
      smoothed_points.append(point)
  return smoothed_points

mae_history = Hist_seoul.history['val_mae']

mae_history = smooth_curve(mae_history[5:])

plt.figure(figsize = (9, 6))
plt.plot(range(1, len(mae_history) + 1), mae_history)
plt.title('Validation MAE')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error')
plt.grid()
plt.show()