In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from data_prep import initial_preprocess
from window_generator import WindowGenerator

In [8]:
# ========================================
#                 SETTINGS
# ========================================

# Data input file
RAW_DATA_PATH = '/users/facsupport/asharma/Data/pbj_full.csv'
PREPROCESSED_DIR = '/users/facsupport/asharma/Data/prep/'
ROWS_TO_READ = 50000

# Weights to split data set
TRAINING_WEIGHT = 0.7
VALIDATION_WEIGHT = 0.2
TEST_WEIGHT = 0.1

# For model training
MAX_EPOCHS = 20
VERBOSE_TRAINING = 1

# Window parameters
INPUT_WIDTH = 7
GAP_WIDTH = 0
LABEL_WIDTH = 1

In [9]:
# ========================================
#               PREPROCESSING
# ========================================

# Preprocess data
df, info = initial_preprocess(
    RAW_DATA_PATH, PREPROCESSED_DIR,
    nrows=ROWS_TO_READ,
    fill_missing_shifts=True,
    normalize=True
)

# Split data into training/validation/test sets
n = len(df)
weights_sum = TRAINING_WEIGHT + VALIDATION_WEIGHT + TEST_WEIGHT
split1 = int(TRAINING_WEIGHT / weights_sum * n)
split2 = int((TRAINING_WEIGHT + VALIDATION_WEIGHT) / weights_sum * n)
train_df = df[:split1]
val_df = df[split1:split2]
test_df = df[split2:]

# Create window generator
window = WindowGenerator(
    train_df, val_df, test_df,
    INPUT_WIDTH, LABEL_WIDTH, GAP_WIDTH,
    label_columns=['hours'])

Loading preprocessed data from '/users/facsupport/asharma/Data/prep/pbj_nrows_50000_zeros_norm.csv'...
Failed.
Loading data...
Filling missing shifts...
Saving preprocessed data...
Preprocessing finished.


In [10]:
# ========================================
#                 MODELS
# ========================================

lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(64, return_sequences=True),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=1)
])

dense = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

In [None]:
# ========================================
#                 TRAINING
# ========================================

def compile_and_fit(model, window, patience=3, verbose=0):
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')
    model.compile(loss=tf.losses.MeanSquaredError(),
                optimizer=tf.optimizers.Adam(),
                metrics=[tf.metrics.MeanAbsoluteError()])

    history = model.fit(window.train, epochs=MAX_EPOCHS,
                      validation_data=window.val,
                      callbacks=[early_stopping],
                      verbose=verbose)
    return history

print()
print("Training dense model.")
history = compile_and_fit(dense, window, verbose=VERBOSE_TRAINING)

print()
print("Training LSTM model.")
history = compile_and_fit(lstm_model, window, verbose=VERBOSE_TRAINING)



Training dense model.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Training LSTM model.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


In [None]:
# ========================================
#                 EVALUATION
# ========================================

val_performance = {}
performance = {}

print()
print("Evaluating dense model.")
val_performance['Dense'] = dense.evaluate(window.val, verbose=VERBOSE_TRAINING)
performance['Dense'] = dense.evaluate(window.test, verbose=0)

print()
print("Evaluating LSTM model.")
val_performance['LSTM'] = lstm_model.evaluate(window.val, verbose=VERBOSE_TRAINING)
performance['LSTM'] = lstm_model.evaluate(window.test, verbose=0)

print()
print("Overall validation performance:")
for model_name, (loss, mea) in val_performance.items():
    print("%s %.4f loss, %.4f mean abs error (%.4f hours)" % ((model_name + ":").ljust(17), loss, mea, mea * std['hours']) )