## Read sklearn and the yellow toolbox

In [47]:
yellow_follow = 'C:/Users/caspe/Desktop/yellow/lib'
# Local path, change this.
import sys; sys.path.append(yellow_follow) 
import sqlite3
import pandas as pd
import ml_utils
import numpy as np
from math import floor
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

## Read tensorflow

In [129]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_probability as tfp
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

## Load datasets and scale them

In [140]:
# Local folder
folder = "C:/Users/caspe/Desktop/paper2_revised/buildings_sync/"

in_path = folder + "buildings.sqlite"

db_cnx = sqlite3.connect(in_path)

df = pd.read_sql_query("SELECT * FROM 'buildings' WHERE buildings.area_vol_ratio >= 1 AND buildings.vol_sum > 1 ORDER BY RANDOM();", db_cnx)

scaler = MinMaxScaler()
cols = ['area', 'perimeter']

df_scaled = scaler.fit_transform(df[cols])

In [141]:
# Ready the traning data
x = pd.DataFrame(df_scaled, columns=cols).values
y = df[['vol_sum']].values

In [142]:
# Create a balance mask, to ensure the classes are balanced

labels = [50, 150, 300, 500, 800]
truth_labels = np.rot90(np.digitize(y, labels))[0]
freq = ml_utils.count_freq(truth_labels)
minority = freq.min(axis=0)[1]
balance_mask = ml_utils.minority_class_mask(truth_labels, minority)

## Investigate the classes

In [101]:
freq

array([[     0, 402273],
       [     1, 383556],
       [     2, 203936],
       [     3, 212904],
       [     4, 237773],
       [     5, 233828]], dtype=int64)

In [143]:
X = x[balance_mask]
Y = y[balance_mask]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

n_features = X.shape[1]

# Deep Learning step

In [144]:
# Define model
model_input = Input(shape=X.shape[1], name="input")
model = Dense(80, activation=tfa.activations.mish, kernel_initializer="he_normal")(model_input)
model = Dense(40, activation=tfa.activations.mish, kernel_initializer="he_normal")(model)
model = Dense(10, activation=tfa.activations.mish, kernel_initializer="he_normal")(model)
model = Dense(8, activation=tfa.activations.mish, kernel_initializer="he_normal")(model)

predictions = Dense(1, activation="relu", dtype="float32")(model)

model = Model(inputs=[model_input], outputs=predictions)

In [145]:
# Define Optimizer
optimizer = tfa.optimizers.Lookahead(
    Adam(
        learning_rate=tfa.optimizers.TriangularCyclicalLearningRate(
            initial_learning_rate=1e-4,
            maximal_learning_rate=1e-2,
            step_size=6,
            scale_mode='cycle',
            name='TriangularCyclicalLearningRate',
        ),
        name="Adam",
    )
)

In [146]:
# Metrics for testing model accuracy
def median_error(y_actual, y_pred):
    return tfp.stats.percentile(tf.math.abs(y_actual - y_pred), 50.0)

def abs_percentage(y_actual, y_pred):
    return tfp.stats.percentile(
        tf.divide(
            tf.abs(tf.subtract(y_actual, y_pred)), (y_actual + 1e-10)
        )
    , 50.0)

In [147]:
# Compile and fit model
model.compile(
    optimizer=optimizer,
    loss='mean_absolute_error',
    metrics=[
        "mean_absolute_error",
        median_error,
        abs_percentage,
    ])

model.fit(
    x=X_train,
    y=y_train,
    epochs=100,
    verbose=1,
    batch_size=1024,
    validation_split=0.3,
    callbacks=[
        EarlyStopping(
            monitor="val_loss",
            patience=12,
            min_delta=1.0,
            restore_best_weights=True,
        ),
    ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


<tensorflow.python.keras.callbacks.History at 0x23defe9c850>

In [139]:
# Evaluate model
loss, mean_absolute_error, median_absolute_error, absolute_percentage_error = model.evaluate(X_test, y_test, verbose=1)

print("Test accuracy:")
print(f"Mean Absolute Error (MAE): {str(round(mean_absolute_error, 5))}")
print(f"Median Absolute Error (MAE): {str(round(median_absolute_error, 5))}")
print(f"Absolute Percentage Error (MAPE): {str(round(absolute_percentage_error, 5))}")


Test accuracy:
Mean Absolute Error (MAE): 157.33722
Median Absolute Error (MAE): 43.92735
Absolute Percentage Error (MAPE): 0.18393


In [148]:
# Evaluate model
loss, mean_absolute_error, median_absolute_error, absolute_percentage_error = model.evaluate(X_test, y_test, verbose=1)

print("Test accuracy:")
print(f"Mean Absolute Error (MAE): {str(round(mean_absolute_error, 5))}")
print(f"Median Absolute Error (MAE): {str(round(median_absolute_error, 5))}")
print(f"Absolute Percentage Error (MAPE): {str(round(absolute_percentage_error, 5))}")

Test accuracy:
Mean Absolute Error (MAE): 157.59041
Median Absolute Error (MAE): 44.04065
Absolute Percentage Error (MAPE): 0.18279
