In [1]:
import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.models import Sequential, Model

from tensorflow.keras.layers import (
    Input,
    Dense,
    Reshape,
    Flatten,
    Dropout,
    BatchNormalization,
    Activation,
    ZeroPadding2D,
    LeakyReLU,
    UpSampling2D,
    Conv2D,
    Convolution2D,
    MaxPooling2D,
    Concatenate,
    GaussianNoise,
    GaussianDropout,
    Lambda,
    GlobalAveragePooling2D,
)

from tensorflow.keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical

import h5py
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import os
import pathlib

import time

import math

In [2]:
print("Tensorflow version: ", tf.version.VERSION)  # tf2
print("Keras version: ", tf.keras.__version__)  # 2.2.4-tf

# tf.enable_eager_execution()  # tf2
print("Is eager execution enabled: ", tf.executing_eagerly())
print("Is there a GPU available: ", tf.test.is_gpu_available())

Tensorflow version:  2.1.0
Keras version:  2.2.4-tf
Is eager execution enabled:  True
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Is there a GPU available:  True


In [3]:
path_train_tfrecord = "fp56_train.tfrecord"
path_test_tfrecord = "fp56_test.tfrecord"

# model save dir

In [4]:
dir_model = "vgg_cam/"
pathlib.Path(dir_model).mkdir(parents=True, exist_ok=True)

In [5]:
fp_dim = (56, 56, 6)


def _parse_function(example_proto):
    # Create a description of the features.
    feature_description = {
        "floorplan": tf.io.FixedLenFeature(
            fp_dim, tf.float32, default_value=tf.zeros(fp_dim, tf.float32)
        ),
        "plan_id": tf.io.FixedLenFeature([], tf.string, default_value=""),
        "year": tf.io.FixedLenFeature([], tf.int64, default_value=-1),  # 0~9
        # "sido": tf.FixedLenFeature([], tf.int64, default_value=-1),
        # "norm_area": tf.FixedLenFeature([], tf.float32, default_value=0.0),
        # "num_rooms": tf.FixedLenFeature([], tf.int64, default_value=-1),
        # "num_baths": tf.FixedLenFeature([], tf.int64, default_value=-1),
    }

    # Parse the input tf.Example proto using the dictionary above.
    parsed_example = tf.io.parse_single_example(example_proto, feature_description)

    return parsed_example["floorplan"], parsed_example["year"]

In [6]:
def _onehot_year(fp, year):
    year_onehot = tf.one_hot(year, 10)  # 1970~4 -> 0, 2015~9 -> 9
    return (fp, year_onehot)

In [7]:
def create_dataset(filepath):
    # This works with arrays as well
    dataset = tf.data.TFRecordDataset(filepath, compression_type="GZIP")

    # Maps the parser on every filepath in the array. You can set the number of parallel loaders here
    dataset = dataset.map(_parse_function, num_parallel_calls=4)

    ### preprocess the features

    # won't use it. use sparse_categorical_crossentropy instead of categorical_crossentropy.
    #     dataset = dataset.map(_onehot_year, num_parallel_calls=4)

    return dataset

In [8]:
def VGG16_convolutions():
    if K.image_data_format() == "channels_last":
        input_shape = (fp_dim[0], fp_dim[1], fp_dim[2])
    else:
        input_shape = (fp_dim[2], fp_dim[0], fp_dim[1])

    model = Sequential()
    model.add(GaussianNoise(0.1, input_shape=input_shape))

    model.add(Conv2D(64, (3, 3), activation="relu", name="conv1_1", padding="same"))
    model.add(Conv2D(64, (3, 3), activation="relu", name="conv1_2", padding="same"))
    model.add(MaxPooling2D((2, 2), strides=(1, 1), padding="same"))

    model.add(Conv2D(128, (3, 3), activation="relu", name="conv2_1", padding="same"))
    model.add(Conv2D(128, (3, 3), activation="relu", name="conv2_2", padding="same"))
    model.add(MaxPooling2D((2, 2), strides=(1, 1), padding="same"))

    model.add(Conv2D(256, (3, 3), activation="relu", name="conv3_1", padding="same"))
    model.add(Conv2D(256, (3, 3), activation="relu", name="conv3_2", padding="same"))
    model.add(Conv2D(256, (3, 3), activation="relu", name="conv3_3", padding="same"))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(Conv2D(512, (3, 3), activation="relu", name="conv4_1", padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu", name="conv4_2", padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu", name="conv4_3", padding="same"))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(Conv2D(512, (3, 3), activation="relu", name="conv5_1", padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu", name="conv5_2", padding="same"))
    model.add(Conv2D(512, (3, 3), activation="relu", name="conv5_3", padding="same"))
    return model

In [9]:
num_classes = 10


def create_model():
    model = VGG16_convolutions()

    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))

    model.compile(
        optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model

In [10]:
callback_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    dir_model + "model-{epoch:02d}-{val_loss:.2f}-{val_accuracy:.1%}.hdf5",
    # save_weights_only=True,
    verbose=1,
)

In [11]:
callback_stop = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=20)

# run

In [12]:
# .repeat().shuffle(4096).batch(8)

train_dataset = create_dataset(path_train_tfrecord).shuffle(1024).batch(8)
test_dataset = create_dataset(path_test_tfrecord).batch(8)

train_dataset, test_dataset

(<BatchDataset shapes: ((None, 56, 56, 6), (None,)), types: (tf.float32, tf.int64)>,
 <BatchDataset shapes: ((None, 56, 56, 6), (None,)), types: (tf.float32, tf.int64)>)

In [13]:
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gaussian_noise (GaussianNois (None, 56, 56, 6)         0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 56, 56, 64)        3520      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 56, 56, 64)        36928     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 56, 56, 64)        0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 56, 56, 128)       73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 56, 56, 128)       147584    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 56, 56, 128)       0

In [14]:
path_best = dir_model + "model-15-1.36.hdf5"
epoch_best = 0  # 0 if starting from fresh

if epoch_best and os.path.exists(path_best):
    model.load_weights(path_best)
    history = model.fit(
        train_dataset,
        epochs=50,
        initial_epoch=epoch_best,
        validation_data=test_dataset,
        callbacks=[callback_checkpoint, callback_stop],
    )
else:
    history = model.fit(
        train_dataset,
        epochs=50,
        validation_data=test_dataset,
        callbacks=[callback_checkpoint, callback_stop],
    )

Epoch 1/50
   5075/Unknown - 1155s 228ms/step - loss: 1.8163 - accuracy: 0.2780
Epoch 00001: saving model to vgg_cam/model-01-1.83-31.9%.hdf5
Epoch 2/50
Epoch 00002: saving model to vgg_cam/model-02-1.48-39.9%.hdf5
Epoch 3/50
Epoch 00003: saving model to vgg_cam/model-03-1.50-39.2%.hdf5
Epoch 4/50
Epoch 00004: saving model to vgg_cam/model-04-1.42-43.8%.hdf5
Epoch 5/50
Epoch 00005: saving model to vgg_cam/model-05-1.47-44.3%.hdf5
Epoch 6/50
Epoch 00006: saving model to vgg_cam/model-06-1.34-47.0%.hdf5
Epoch 7/50
Epoch 00007: saving model to vgg_cam/model-07-1.37-47.8%.hdf5
Epoch 8/50
Epoch 00008: saving model to vgg_cam/model-08-1.31-48.0%.hdf5
Epoch 9/50
Epoch 00009: saving model to vgg_cam/model-09-1.29-47.8%.hdf5
Epoch 10/50
Epoch 00010: saving model to vgg_cam/model-10-1.23-50.5%.hdf5
Epoch 11/50
Epoch 00011: saving model to vgg_cam/model-11-1.18-52.3%.hdf5
Epoch 12/50
Epoch 00012: saving model to vgg_cam/model-12-1.26-50.3%.hdf5
Epoch 13/50
Epoch 00013: saving model to vgg_cam/mod

Epoch 29/50
Epoch 00029: saving model to vgg_cam/model-29-2.03-52.1%.hdf5
Epoch 30/50
Epoch 00030: saving model to vgg_cam/model-30-1.85-51.2%.hdf5
Epoch 31/50
Epoch 00031: saving model to vgg_cam/model-31-1.67-54.6%.hdf5
Epoch 32/50
Epoch 00032: saving model to vgg_cam/model-32-1.93-52.8%.hdf5
Epoch 33/50
Epoch 00033: saving model to vgg_cam/model-33-2.14-52.6%.hdf5
Epoch 34/50
Epoch 00034: saving model to vgg_cam/model-34-2.16-53.0%.hdf5
Epoch 35/50
Epoch 00035: saving model to vgg_cam/model-35-2.26-53.3%.hdf5
Epoch 36/50
Epoch 00036: saving model to vgg_cam/model-36-2.37-52.9%.hdf5
Epoch 37/50
Epoch 00037: saving model to vgg_cam/model-37-2.62-53.3%.hdf5


In [15]:
history.history

{'loss': [1.8162978943348331,
  1.4635991291833268,
  1.3362321493349507,
  1.2741757292742564,
  1.2264919695851133,
  1.186024851397086,
  1.1507806714265016,
  1.119309566991013,
  1.0876969628553919,
  1.0570432796616984,
  1.0256374557767984,
  0.9991143535520004,
  0.9688518497502137,
  0.9401483448623548,
  0.9102872164239202,
  0.8780909352328626,
  0.8454820460959358,
  0.8092848961224338,
  0.7753078669178949,
  0.7370844810395072,
  0.6985614552739348,
  0.6580675742025299,
  0.6145530962747002,
  0.5768494394976398,
  0.5350521280238432,
  0.4967964432391811,
  0.46053187873514906,
  0.42668259218678334,
  0.39550364380784503,
  0.3682810392729146,
  0.34136750161174495,
  0.3096840356207254,
  0.28847593629735113,
  0.2692262731716117,
  0.24416366183208096,
  0.22865911675981732,
  0.21053600871395625],
 'accuracy': [0.27796936,
  0.4164491,
  0.45667273,
  0.47802848,
  0.4984482,
  0.51677424,
  0.5251983,
  0.5404453,
  0.5535987,
  0.5646337,
  0.5773683,
  0.589832,


In [16]:
df_hist = pd.DataFrame(
    history.history,
    index=range(epoch_best + 1, epoch_best + len(history.history["loss"]) + 1),
)
df_hist.index.name = "epoch"

In [17]:
df_hist

Unnamed: 0_level_0,loss,accuracy,val_loss,val_accuracy
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.816298,0.277969,1.827609,0.319453
2,1.463599,0.416449,1.477872,0.399213
3,1.336232,0.456673,1.499651,0.391548
4,1.274176,0.478028,1.424726,0.437953
5,1.226492,0.498448,1.467642,0.442511
6,1.186025,0.516774,1.338576,0.46965
7,1.150781,0.525198,1.371165,0.478144
8,1.11931,0.540445,1.308885,0.479801
9,1.087697,0.553599,1.291435,0.477833
10,1.057043,0.564634,1.232405,0.505076


In [18]:
path_hist = dir_model + "history.csv"
df_hist.to_csv(path_hist, mode="a", header=not os.path.exists(path_hist))