In [112]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from livelossplot import PlotLossesKeras
import pickle

In [100]:
paths = list(glob("../outputs/withNewFeatures/dataByLocation*.csv"))
debug_limit = None
if debug_limit == None:
    debug_limit = len(paths)
elif debug_limit < 2:
    print("Insufficient number of files to debug. Setting debug_limit to 2.")
    debug_limit = 2
test_train_ratio = 0.2
window_length = 10
min_drive_length = 5 * 60

all_windows = []
train_windows = []
test_windows = []
for i,path in enumerate(paths[:debug_limit]):
    df = pd.read_csv(path).dropna().drop_duplicates()
    a = df.to_numpy()
    if a[-1][0] - a[0][0] < min_drive_length:
        # print(f'Skipping {path.split("/")[-1]} because it is too short')
        continue
    df = df.drop(columns=["Time"])
    a = df.to_numpy()
    windows = np.lib.stride_tricks.sliding_window_view(a, (window_length, a.shape[1]))
    windows = list(np.reshape(windows, (windows.shape[0], window_length, a.shape[1])))
    all_windows += windows
    isTest = i % int(1/test_train_ratio) == 0
    if isTest:
        test_windows += windows
    else:
        train_windows += windows

all_windows = np.array(all_windows)
train_windows = np.array(train_windows)
test_windows = np.array(test_windows)

for subset in [train_windows, test_windows]:
    X = np.zeros((subset.shape[0], window_length, subset.shape[2] - 2))
    y = np.zeros((subset.shape[0], 2))
    for i,window in enumerate(subset):
        X[i] = window[:, 2:]
        y[i] = [window[0, 0] - window[-1, 0], window[0, 1] - window[-1, 1]]
    if subset is train_windows:
        X_train = X
        y_train = y
    elif subset is test_windows:
        X_test = X
        y_test = y

print("Train windows:", X_train.shape)
print("Test windows:", X_test.shape)
print("Ratio =", X_test.shape[0] / (X_train.shape[0] + X_test.shape[0]))

def column_from_windows(windows, i):
    column = windows[:, :, i]
    column = np.reshape(column, (column.shape[0] * column.shape[1],))
    column_without_dupes = np.array([value for i,value in enumerate(column) if i < window_length or i % window_length == window_length - 1])
    return column_without_dupes

def scaler_for_windows(windows):
    realigned = np.transpose(np.array([column_from_windows(windows, i) for i in range(windows.shape[2])]))
    scaler = MinMaxScaler()
    scaler.fit(realigned)
    with open("window_scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)
    return scaler

def scaler_for_labels(labels):
    scaler = MinMaxScaler()
    scaler.fit(labels)
    with open("label_scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)
    return scaler

def scale_windows(windows, scaler):
    new_windows = np.zeros(windows.shape)
    for i,window in enumerate(windows):
        new_windows[i] = scaler.transform(window)
    return new_windows

def scale_labels(labels, scaler):
    new_labels = scaler.transform(labels)
    return new_labels

def unwindow(windows):
    realigned = np.transpose(np.array([column_from_windows(windows, i) for i in range(windows.shape[2])]))
    return realigned

window_scaler = scaler_for_windows(X_train)
label_scaler = scaler_for_labels(y_train)

X_train = scale_windows(X_train, window_scaler)
y_train = scale_labels(y_train, label_scaler)
X_test = scale_windows(X_test, window_scaler)
y_test = scale_labels(y_test, label_scaler)

X_train = unwindow(X_train)
X_test = unwindow(X_test)

Train windows: (1559382, 10, 17)
Test windows: (307815, 10, 17)
Ratio = 0.16485405664212185


In [103]:
batch_size = 512

train_data = keras.utils.timeseries_dataset_from_array(
    X_train,
    y_train,
    window_length,
    batch_size=batch_size,
    shuffle=True,
    seed=123
)

test_data = keras.utils.timeseries_dataset_from_array(
    X_test,
    y_test,
    window_length,
    batch_size=batch_size,
    shuffle=True,
    seed=123
)

In [104]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(y_train.shape[1], activation="linear")(x)
    return keras.Model(inputs, outputs)

In [120]:
input_shape = (window_length, X_train.shape[-1])

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

def inner_part_custom_metric(y_true, y_pred, i):
    d = y_pred-y_true
    root_square_d = tf.math.sqrt(tf.math.square(d))
    return root_square_d[:,i]

def custom_metric_output_1():
    def RMSE_1(y_true, y_pred):
        return inner_part_custom_metric(y_true, y_pred, 0)
    return RMSE_1

def custom_metric_output_2():
    def RMSE_2(y_true, y_pred):
        return inner_part_custom_metric(y_true, y_pred, 1)
    return RMSE_2

model.compile(
    loss="mean_squared_error",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=[keras.metrics.RootMeanSquaredError(),custom_metric_output_1(), custom_metric_output_2()],
)

checkpoint = keras.callbacks.ModelCheckpoint(
    "checkpoints",
    monitor="val_loss",
    save_best_only=True,
    save_weights_only=False,
    save_freq="epoch"
)

callbacks = [keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True), PlotLossesKeras()]

history = model.fit(
    train_data,
    validation_data=test_data,
    validation_steps=100,
    epochs=200,
    callbacks=callbacks
)

Epoch 1/200


2022-07-12 22:51:31.563719: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


  67/3046 [..............................] - ETA: 18:16 - loss: 0.1315 - root_mean_squared_error: 0.3626 - RMSE_1: 0.3363 - RMSE_2: 0.2846