In [0]:
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.callbacks import TensorBoard
import pandas as pd
import numpy as np
from datetime import datetime
import tensorflow as tf

In [0]:
# model
class MLP(tf.keras.Model):
    def __init__(self):
        super(MLP, self).__init__()
        self.dense1 = tf.keras.layers.Dense(512, activation="relu")
        self.dense2 = tf.keras.layers.Dense(512, activation="relu")
        self.dropout1 = tf.keras.layers.Dropout(0.1)
        self.dense3 = tf.keras.layers.Dense(1028, activation="relu")
        self.dense4 = tf.keras.layers.Dense(1, activation="relu")

    def call(self, x):
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dropout1(x)
        x = self.dense3(x)
        return self.dense4(x)

In [0]:
# 0 ~ 1 の範囲に正則化
def min_max(x):
    min_value = x.min()
    max_value = x.max()
    result = (x - min_value)/(max_value - min_value)
    return result

def create_feature(data):
    feature = {}
    cat_cols = ['駅名', '建物構造', '間取り']
    for cat_col in cat_cols:
        cat_data = data[cat_col].values
        cat_categories = data[cat_col].unique().tolist()
        cat_indices = []
        for cat_datum in cat_data:
            cat_indices.append(cat_categories.index(cat_datum))
        feature[cat_col] = np.eye(len(cat_categories))[cat_indices]

    feature["所在階"] = min_max(data["所在階"].str.replace('B', '-').astype(int).values) #地下表記を数字に変換
    feature["徒歩分"] = min_max(data["徒歩分"].values)
    feature["専有面積"] = min_max(data["専有面積"].values)
    feature["部屋数"] = min_max(data["部屋数"].values)
    feature["築年"] = data["築年"].values
    feature["築年"] = pd.to_datetime(data["築年"].astype(str), format="%Y%M", errors="coerce")
    feature["築年"] = min_max((pd.to_datetime("2019-10-01") - feature["築年"]).dt.days.values.astype(int))

    feature_vec = np.hstack((
        feature["駅名"],
        feature["建物構造"],
        feature["間取り"],
        feature["徒歩分"][:, np.newaxis],
        feature["専有面積"][:, np.newaxis],
        feature["部屋数"][:, np.newaxis],
        feature["築年"][:, np.newaxis],
        feature["所在階"][:, np.newaxis],
    ))

    return feature_vec

In [0]:
data = pd.read_csv("dummy_data.csv")

In [0]:
#train test split
X = create_feature(data)
Y = data["成約価格"].values
x_train, x_test, y_train, y_test= train_test_split(X, Y, test_size=5000, random_state=0)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=5000, random_state=0)

In [0]:
# train
model = MLP()
tensorboard = TensorBoard(log_dir="logs")
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    "./checkpoint/MLP-{epoch:04d}.ckpt",
    verbose=1,
    save_weights_only=True,
    period=500
)
model.compile(
    optimizer=tf.keras.optimizers.Adam(lr=1e-1),
    loss='mean_absolute_percentage_error',
    metrics=[
        "mean_absolute_percentage_error", 
 #       "mean_absolute_error",
 #       "mean_squared_error"
    ]
)
model.fit(
    x_train, 
    y_train, 
    epochs=100,
    batch_size=512,
    validation_data=(x_valid, y_valid),
    callbacks=[tensorboard, checkpoint]
)

Train on 30000 samples, validate on 5000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

<tensorflow.python.keras.callbacks.History at 0x7f2ad96010f0>

In [0]:
train_mape, train_acc = model.evaluate(x_train,  y_train)
val_mape, val_acc = model.evaluate(x_valid,  y_valid)
print(f'train mape : {train_mape:.3f}%' )

print(f'valid mape : {val_mape:.3f}%' )

train mape : 6.573%
valid mape : 7.625%


In [0]:
test_mape, test_acc = model.evaluate(x_test,  y_test)
print(f'test mape : {test_mape:.3f}%' )

test mape : 7.382%


In [0]:
model.dropout1 = tf.keras.layers.Dropout(0.0)
data = data.append(pd.DataFrame.from_dict({
    "駅名": ["新橋"],
    "建物構造": ["SRC"],
    "徒歩分": [10],
    "専有面積": [30],
    "間取り": ["R"],
    "部屋数": [1],
    "築年": ["198001"],
    "所在階": ["5"],
    "成約価格": [-1]
})).reset_index(drop=True)
X = create_feature(data)
model.predict_on_batch(X[-1:])

array([[19491340.]], dtype=float32)

In [0]:
model.dropout1 = tf.keras.layers.Dropout(0.0)
data = data.append(pd.DataFrame.from_dict({
    "駅名": ["新橋"],
    "建物構造": ["SRC"],
    "徒歩分": [10],
    "専有面積": [30],
    "間取り": ["R"],
    "部屋数": [1],
    "築年": ["198001"],
    "所在階": ["5"],
    "成約価格": [-1]
})).reset_index(drop=True)
X = create_feature(data)
model.predict_on_batch(X[-1:])

array([[19491340.]], dtype=float32)