In [None]:
import os
import sys
import rrdtool
import pickle
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import HourLocator, DateFormatter
import IPython
import IPython.display
from IPython.display import Image
from auserverdata.preparation.rrd import (
    parse_rrds_for_all_collectd_servers,
    parse_rrds_for_all_snmp_servers,
    get_time_series_data_for_collectd_servers,
    get_time_series_data_for_snmp_servers,
    get_feature_names_for_timestamp_set,
    get_feature_names_with_substring_present,
    map_feature_names_to_timestamps,
    map_timestamp_sets_to_feature_names,
    print_timestamp_set_to_feature_names_map_info
)
from auserverdata.local import get_local_config
from auserverdata.analysis import (
    plot_time_series_data_on_single_plot,
    plot_time_series_data_in_vertical_stack
)
from joblib import Parallel,delayed
from sklearn.decomposition import PCA
import pyarrow.parquet as pq
import pyarrow.compute as pc
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error

In [None]:
local_config = get_local_config()

In [None]:
collectd_ts_df = get_time_series_data_for_collectd_servers(rrd_dir=local_config.rrd_dir, collectd_servers=['gpu18'])

In [None]:
timestamps_per_feature_name = map_feature_names_to_timestamps(collectd_ts_df)
feature_names_per_timestamp_set = map_timestamp_sets_to_feature_names(timestamps_per_feature_name)

In [None]:
print_timestamp_set_to_feature_names_map_info(feature_names_per_timestamp_set)

In [None]:
feature_names = get_feature_names_for_timestamp_set(feature_names_per_timestamp_set, 0)

In [None]:
collectd_ts_df[feature_names].dropna()

In [None]:
cpu_feature_names = get_feature_names_with_substring_present(feature_names,'cpu')
memory_feature_names = get_feature_names_with_substring_present(feature_names, 'memory')
feature_names = cpu_feature_names + memory_feature_names

In [None]:
memory_feature_names

### Data Preparation

The CPU features for GPU1 have timestamps 2 seconds after the CPU Temperature feature. The below sets all features to have the timestamps of the CPU Usage features. There are 1200 datapoints in the data, each separated by 10 seconds. 

In [None]:
# label_of_interest = 'temperature-CPU1 Temp processor (3.1)'
# label_df = collectd_ts_df['gpu1']['ipmi'][label_of_interest].dropna()

label_of_interest = 'memory_used'
label_df = collectd_ts_df[('gpu18', 'memory', 'memory-used', 'value')].dropna()

In [None]:
df1 = collectd_ts_df[cpu_feature_names].dropna()
df1.columns = ['/'.join(col) for col in df1.columns]
df1[label_of_interest] = label_df.values
columns_to_drop = [col for col in df1.columns if df1[col].nunique() == 1]
df1.drop(columns=columns_to_drop, inplace=True)

In [None]:
df1.index[-1], df1.index[0]

In [None]:
df1['memory_used']

In [None]:
plt.plot(df1['memory_used'])

In [None]:
df1

#### Splitting

In [None]:
n = len(df1)
train_df = df1[0:int(n*0.7)]
val_df = df1[int(n*0.7):int(n*0.9)]
test_df = df1[int(n*0.9):]

# Check for columns with the same value at every index
columns_to_drop = []
for column in train_df.columns:
    if len(train_df[column].unique()) == 1:
        columns_to_drop.append(column)

# Drop the columns with the same value at every index
train_df = train_df.drop(columns=columns_to_drop)
val_df = val_df.drop(columns=columns_to_drop)
test_df = test_df.drop(columns=columns_to_drop)

num_features = train_df.shape[1]

In [None]:
train_df.shape, val_df.shape, test_df.shape

#### Scaling

In [None]:
train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

train_features = train_df.drop(columns=['memory_used'])
val_features = val_df.drop(columns=['memory_used'])
test_features = test_df.drop(columns=['memory_used'])

train_target = train_df['memory_used']
val_target = val_df['memory_used']
test_target = test_df['memory_used']

column_indices = {name: i for i, name in enumerate(train_df.columns)}

In [None]:
train_df.shape, val_df.shape, test_df.shape

### PCA

In [None]:
def run_pca(features_df,
            top_or_bottom,
            desired_variance,
            pca:None):

    if top_or_bottom == 'top':
        if pca is None:
            pca = PCA(n_components=desired_variance, svd_solver='auto')
            pca.fit(features_df)
        features_df = pca.transform(features_df)
    elif top_or_bottom == 'bottom':
        if pca is None:
            pca = PCA()
            pca.fit(features_df)
        cumulative_variance_ratio = np.cumsum(pca.explained_variance_ratio_)
        n_components_bottom_percent = np.argmax(cumulative_variance_ratio >= (1 - desired_variance)) + 1
        features_df = pca.transform(features_df)[:, n_components_bottom_percent:]
    else:
        raise ValueError('The given value for top_or_bottom is invalid. It should be top or bottom.')
    return features_df, pca

def transform_split_via_pca(features_df, top_or_bottom, desired_variance, pca):
    patients = features_df.index
    features_df,pca = run_pca(features_df=features_df, top_or_bottom=top_or_bottom, desired_variance=desired_variance, pca=pca)
    features_df = pd.DataFrame(features_df, index=patients)
    # Ensure column names are strings, this prevents a possible TypeError
    features_df = features_df.rename(str,axis="columns")
    return features_df, pca

In [None]:
train_features, train_pca = transform_split_via_pca(train_features, 'top', 0.90, None)
val_features, _ = transform_split_via_pca(val_features, 'top', 0.90, train_pca)
test_features, _ = transform_split_via_pca(test_features, 'top', 0.90, train_pca)

In [None]:
train_df = pd.merge(train_features, train_target, left_index=True, right_index=True)
val_df = pd.merge(val_features, val_target, left_index=True, right_index=True)
test_df = pd.merge(test_features, test_target, left_index=True, right_index=True)

column_indices = {name: i for i, name in enumerate(train_df.columns)}

In [None]:
train_df.shape, val_df.shape, test_df.shape

In [None]:
plt.plot(train_df['memory_used'])

In [None]:
plt.plot(val_df['memory_used'])

In [None]:
plt.plot(test_df['memory_used'])

#### Window Creation

In [None]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
                train_df=train_df, val_df=val_df, test_df=test_df,
                label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                            enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                            enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

In [None]:
def split_window(self, features):
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
            axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

WindowGenerator.split_window = split_window

In [None]:
def plot(self, model=None, plot_col=label_of_interest, max_subplots=3):
    inputs, labels = self.example
    plt.figure(figsize=(12, 8))
    plot_col_index = self.column_indices[plot_col]
    max_n = min(max_subplots, len(inputs))
    for n in range(max_n):
        plt.subplot(max_n, 1, n+1)
        plt.ylabel(label_of_interest + '_normed')
        plt.plot(self.input_indices, inputs[n, :, plot_col_index],
                label='Inputs', marker='.', zorder=-10)

        if self.label_columns:
            label_col_index = self.label_columns_indices.get(plot_col, None)
        else:
            label_col_index = plot_col_index

        if label_col_index is None:
            continue

        plt.scatter(self.label_indices, labels[n, :, label_col_index],
                    edgecolors='k', label='Labels', c='#2ca02c', s=64)
        if model is not None:
            predictions = model(inputs)
            plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                    marker='X', edgecolors='k', label='Predictions',
                    c='#ff7f0e', s=64)

        if n == 0:
            plt.legend()

    plt.xlabel('Time [10 Second Intervals]')

WindowGenerator.plot = plot

In [None]:
def make_dataset(self, data):
    data = np.array(data, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
        data=data,
        targets=None,
        sequence_length=self.total_window_size,
        sequence_stride=1,
        shuffle=True,
        batch_size=32,)

    ds = ds.map(self.split_window)

    return ds

WindowGenerator.make_dataset = make_dataset

In [None]:
@property
def train(self):
  return self.make_dataset(self.train_df)

@property
def val(self):
  return self.make_dataset(self.val_df)

@property
def test(self):
  return self.make_dataset(self.test_df)

@property
def example(self):
  """Get and cache an example batch of `inputs, labels` for plotting."""
  result = getattr(self, '_example', None)
  if result is None:
    # No example batch was found, so get one from the `.train` dataset
    result = next(iter(self.train))
    # And cache it for next time
    self._example = result
  return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

### Helper Functions

In [None]:
def compile_and_fit(model, window, learning_rate=1e-6, epochs=50, patience=2):
  early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                    patience=patience,
                                                    mode='min')

  model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                metrics=[tf.keras.metrics.MeanAbsoluteError()])

  history = model.fit(window.train, epochs=epochs,
                      validation_data=window.val,
                      callbacks=[early_stopping])
  return history

In [None]:
def plot_train_val_loss(history):
    plt.plot(history.history['loss'], label = 'train_loss')
    plt.plot(history.history['val_loss'], label = 'val_loss')
    plt.legend()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')

## Single Step Modeling

### Window Creation

In [None]:
Image('img/window_gen_example.png', width = 300, height = 200)

In [None]:
single_step_window = WindowGenerator(
    input_width=1, label_width=1, shift=1,
    label_columns=[label_of_interest])

single_step_window

In [None]:
Image(filename='img/singlewindow.png', width = 200, height = 300)

In [None]:
for example_inputs, example_labels in single_step_window.train.take(1):
    print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
    print(f'Labels shape (batch, time, features): {example_labels.shape}')

A wider WindowGenerator that generates windows 24 hours of consecutive inputs and labels at a time. The new wide_window variable doesn't change the way the model operates. The model still makes predictions one hour into the future based on a single input time step. Here, the time axis acts like the batch axis: each prediction is made independently with no interaction between time steps:

In [None]:
wide_window = WindowGenerator(
    input_width=30, label_width=30, shift=1,
    label_columns=[label_of_interest])

wide_window

In [None]:
Image(filename='img/widewindow.png', width = 300, height = 200)

In [None]:
for example_inputs, example_labels in wide_window.train.take(1):
    print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
    print(f'Labels shape (batch, time, features): {example_labels.shape}')

### Baseline Model

Start with a model that just returns the current label as the prediction, predicting "No change"

In [None]:
class Baseline(tf.keras.Model):
    def __init__(self, label_index=None):
        super().__init__()
        self.label_index = label_index

    def call(self, inputs):
        if self.label_index is None:
            return inputs
        result = inputs[:, :, self.label_index]
        return result[:, :, tf.newaxis]

In [None]:
baseline = Baseline(label_index=column_indices[label_of_interest])

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(single_step_window.val)
performance['Baseline'] = baseline.evaluate(single_step_window.test, verbose=0)

In [None]:
single_step_window.plot(baseline)

In the above plots of three examples the single step model is run over the course of 24 hours. This deserves some explanation:

- The blue Inputs line shows the input temperature at each time step. The model receives all features, this plot only shows the temperature.
- The green Labels dots show the target prediction value. These dots are shown at the prediction time, not the input time. That is why the range of labels is shifted 1 step relative to the inputs.
- The orange Predictions crosses are the model's prediction's for each output time step. If the model were predicting perfectly the predictions would land directly on the Labels.


In [None]:
baseline = Baseline(label_index=column_indices[label_of_interest])

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}
val_performance['Baseline'] = baseline.evaluate(wide_window.val)
performance['Baseline'] = baseline.evaluate(wide_window.test, verbose=0)

In [None]:
wide_window.plot(baseline)

### Linear Model

In [None]:
Image(filename='img/linear.png', width = 100, height = 200)

In [None]:
linear = tf.keras.Sequential([
    tf.keras.layers.Dense(units=1)
])

In [None]:
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', linear(wide_window.example[0]).shape)

In [None]:
history = compile_and_fit(linear, wide_window, epochs = 300, learning_rate=1e-4)
IPython.display.clear_output()
val_performance['Linear'] = linear.evaluate(wide_window.val)
performance['Linear'] = linear.evaluate(wide_window.test, verbose=-1)
plot_train_val_loss(history)

In [None]:
wide_window.plot(linear)

### Dense Model

In [None]:
dense = tf.keras.Sequential([
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=1)
])

history = compile_and_fit(dense, wide_window, epochs=1000, learning_rate=1e-5)
IPython.display.clear_output()
val_performance['Dense'] = dense.evaluate(wide_window.val)
performance['Dense'] = dense.evaluate(wide_window.test, verbose=1)
plot_train_val_loss(history)

In [None]:
wide_window.plot(dense)

### Multi-Step Dense

In [None]:
Image(filename='img/multistepdense.png', width = 300, height = 200)

In [None]:
CONV_WIDTH = 6
conv_window = WindowGenerator(
    input_width=CONV_WIDTH,
    label_width=1,
    shift=1,
    label_columns=[label_of_interest])

conv_window

In [None]:
conv_window.plot()
plt.title("Given 6 timestep of inputs, predict 1 timestep into the future.")

In [None]:
multi_step_dense = tf.keras.Sequential([
    # Shape: (time, features) => (time*features)
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
    # Add back the time dimension.
    # Shape: (outputs) => (1, outputs)
    tf.keras.layers.Reshape([1, -1]),
])


In [None]:
print('Input shape:', conv_window.example[0].shape)
print('Output shape:', multi_step_dense(conv_window.example[0]).shape)

In [None]:
history = compile_and_fit(multi_step_dense, conv_window, epochs=2000, learning_rate=1e-4, patience=5)
IPython.display.clear_output()
val_performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.val)
performance['Multi step dense'] = multi_step_dense.evaluate(conv_window.test, verbose=1)
plot_train_val_loss(history)

In [None]:
conv_window.plot(multi_step_dense)

### CNN

In [None]:
Image(filename='img/cnn.png', width = 300, height = 200)

In [None]:
conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=16,
                           kernel_size=(CONV_WIDTH,),
                           activation='relu'),
    tf.keras.layers.Dense(units=256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=1),
])

In [None]:
print("Conv model on `conv_window`")
print('Input shape:', conv_window.example[0].shape)
print('Output shape:', conv_model(conv_window.example[0]).shape)

In [None]:
history = compile_and_fit(conv_model, conv_window, epochs = 1000, learning_rate=1e-3, patience=30)
IPython.display.clear_output()
val_performance['Conv'] = conv_model.evaluate(conv_window.val)
performance['Conv'] = conv_model.evaluate(conv_window.test, verbose=0)
plot_train_val_loss(history)

In [None]:
print("Wide window")
print('Input shape:', wide_window.example[0].shape)
print('Labels shape:', wide_window.example[1].shape)
print('Output shape:', conv_model(wide_window.example[0]).shape)

In [None]:
LABEL_WIDTH = 30
INPUT_WIDTH = LABEL_WIDTH + (CONV_WIDTH - 1)
wide_conv_window = WindowGenerator(
    input_width=INPUT_WIDTH,
    label_width=LABEL_WIDTH,
    shift=1,
    label_columns=[label_of_interest])

wide_conv_window

In [None]:
print("Wide conv window")
print('Input shape:', wide_conv_window.example[0].shape)
print('Labels shape:', wide_conv_window.example[1].shape)
print('Output shape:', conv_model(wide_conv_window.example[0]).shape)

In [None]:
wide_conv_window.plot(conv_model)

### LSTM

In [None]:
lstm_model = tf.keras.models.Sequential([
    # Shape [batch, time, features] => [batch, time, lstm_units]
    tf.keras.layers.LSTM(16, return_sequences=True),
    tf.keras.layers.Dense(units=16, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=8, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    # tf.keras.layers.Dense(units=64, activation='relu'),
    # tf.keras.layers.Dropout(0.4),
    # tf.keras.layers.Dense(units=32, activation='relu'),
    # tf.keras.layers.Dropout(0.4),
    # Shape => [batch, time, features]
    tf.keras.layers.Dense(units=1)
])

In [None]:
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', lstm_model(wide_window.example[0]).shape)

In [None]:
history = compile_and_fit(lstm_model, wide_window, epochs=1000, learning_rate=1e-4, patience=10)
IPython.display.clear_output()
val_performance['LSTM'] = lstm_model.evaluate(wide_window.val)
performance['LSTM'] = lstm_model.evaluate(wide_window.test, verbose=0)
plot_train_val_loss(history)

In [None]:
wide_window.plot(lstm_model)

### Model Comparison

In [None]:
x = np.arange(len(performance))
width = 0.3
metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in val_performance.values()]
test_mae = [v[metric_index] for v in performance.values()]

plt.ylabel(f'mean_absolute_error [{label_of_interest}, normalized]')
plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=performance.keys(),
           rotation=45)
_ = plt.legend()

## Multi-Step Modeling

In [None]:
OUT_STEPS = 2
multi_window = WindowGenerator(input_width=18,
                               label_width=OUT_STEPS,
                               shift=OUT_STEPS)

multi_window.plot()
multi_window

### Baselines

In [None]:
class MultiStepLastBaseline(tf.keras.Model):
  def call(self, inputs):
    return tf.tile(inputs[:, -1:, :], [1, OUT_STEPS, 1])

last_baseline = MultiStepLastBaseline()
last_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                      metrics=[tf.keras.metrics.MeanAbsoluteError()])

multi_val_performance = {}
multi_performance = {}

multi_val_performance['Last'] = last_baseline.evaluate(multi_window.val)
multi_performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0)
multi_window.plot(last_baseline)

### Single-Shot Models

#### Linear

In [None]:
multi_linear_model = tf.keras.Sequential([
    # Take the last time-step.
    # Shape [batch, time, features] => [batch, 1, features]
    tf.keras.layers.Lambda(lambda x: x[:, -1:, :]),

    tf.keras.layers.Dense(units=256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    # Shape => [batch, 1, out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS*num_features,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
])

history = compile_and_fit(multi_linear_model, multi_window, epochs=1000, learning_rate=1e-6, patience=30)

IPython.display.clear_output()
multi_val_performance['Linear'] = multi_linear_model.evaluate(multi_window.val)
multi_performance['Linear'] = multi_linear_model.evaluate(multi_window.test, verbose=0)
plot_train_val_loss(history)

In [None]:
multi_window.plot(multi_linear_model)

#### Dense

In [None]:
multi_dense_model = tf.keras.Sequential([
    # Take the last time step.
    # Shape [batch, time, features] => [batch, 1, features]
    tf.keras.layers.Lambda(lambda x: x[:, -1:, :]),
    # Shape => [batch, 1, dense_units]
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    # Shape => [batch, out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS*num_features,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
])

history = compile_and_fit(multi_dense_model, multi_window, epochs=1000, learning_rate=1e-3, patience=10)

IPython.display.clear_output()
multi_val_performance['Dense'] = multi_dense_model.evaluate(multi_window.val)
multi_performance['Dense'] = multi_dense_model.evaluate(multi_window.test, verbose=0)
plot_train_val_loss(history)

In [None]:
multi_window.plot(multi_dense_model)

#### CNN

In [None]:
CONV_WIDTH = 6
multi_conv_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, CONV_WIDTH, features]
    tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]),
    # Shape => [batch, 1, conv_units]
    tf.keras.layers.Conv1D(1024, activation='relu', kernel_size=(CONV_WIDTH)),
    tf.keras.layers.Dense(units=512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=256, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    # tf.keras.layers.Dense(128,activation='relu'),
    # tf.keras.layers.Dense(64,activation='relu'),
    # tf.keras.layers.Dense(32,activation='relu'),
    # Shape => [batch, 1,  out_steps*features]
    tf.keras.layers.Dense(OUT_STEPS*num_features,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features]
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
])

history = compile_and_fit(multi_conv_model, multi_window, epochs=1000, learning_rate=1e-4)

IPython.display.clear_output()
multi_val_performance['Conv'] = multi_conv_model.evaluate(multi_window.val)
multi_performance['Conv'] = multi_conv_model.evaluate(multi_window.test, verbose=0)
plot_train_val_loss(history)


In [None]:
multi_window.plot(multi_conv_model)

#### LSTM/RNN

In [None]:
multi_lstm_model = tf.keras.Sequential([
    # Shape [batch, time, features] => [batch, lstm_units].
    # Adding more `lstm_units` just overfits more quickly.
    tf.keras.layers.LSTM(32, return_sequences=False),
    tf.keras.layers.Dense(256,activation='relu'),
    # tf.keras.layers.Dense(128,activation='relu'),
    # tf.keras.layers.Dense(64,activation='relu'),
    # tf.keras.layers.Dense(32,activation='relu'),
    # Shape => [batch, out_steps*features].
    tf.keras.layers.Dense(OUT_STEPS*num_features,
                          kernel_initializer=tf.initializers.zeros()),
    # Shape => [batch, out_steps, features].
    tf.keras.layers.Reshape([OUT_STEPS, num_features])
])

history = compile_and_fit(multi_lstm_model, multi_window, epochs=1000, learning_rate=1e-4, patience=5)

IPython.display.clear_output()

multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val)
multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0)
plot_train_val_loss(history)

In [None]:
multi_window.plot(multi_lstm_model)

In [None]:
x = np.arange(len(multi_performance))
width = 0.3

metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in multi_val_performance.values()]
test_mae = [v[metric_index] for v in multi_performance.values()]

plt.bar(x - 0.17, val_mae, width, label='Validation')
plt.bar(x + 0.17, test_mae, width, label='Test')
plt.xticks(ticks=x, labels=multi_performance.keys(),
           rotation=45)
plt.ylabel(f'MAE (average over all times and outputs)')
_ = plt.legend()

### Autoregressive Models

In [None]:
class FeedBack(tf.keras.Model):
  def __init__(self, units, out_steps):
    super().__init__()
    self.out_steps = out_steps
    self.units = units
    self.lstm_cell = tf.keras.layers.LSTMCell(units)
    # Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
    self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
    self.dense = tf.keras.layers.Dense(num_features)

In [None]:
feedback_model = FeedBack(units=32, out_steps=OUT_STEPS)

In [None]:
def warmup(self, inputs):
  # inputs.shape => (batch, time, features)
  # x.shape => (batch, lstm_units)
  x, *state = self.lstm_rnn(inputs)

  # predictions.shape => (batch, features)
  prediction = self.dense(x)
  return prediction, state

FeedBack.warmup = warmup

In [None]:
prediction, state = feedback_model.warmup(multi_window.example[0])
prediction.shape

In [None]:
def call(self, inputs, training=None):
  # Use a TensorArray to capture dynamically unrolled outputs.
  predictions = []
  # Initialize the LSTM state.
  prediction, state = self.warmup(inputs)

  # Insert the first prediction.
  predictions.append(prediction)

  # Run the rest of the prediction steps.
  for n in range(1, self.out_steps):
    # Use the last prediction as input.
    x = prediction
    # Execute one lstm step.
    x, state = self.lstm_cell(x, states=state,
                              training=training)
    # Convert the lstm output to a prediction.
    prediction = self.dense(x)
    # Add the prediction to the output.
    predictions.append(prediction)

  # predictions.shape => (time, batch, features)
  predictions = tf.stack(predictions)
  # predictions.shape => (batch, time, features)
  predictions = tf.transpose(predictions, [1, 0, 2])
  return predictions

FeedBack.call = call

In [None]:
print('Output shape (batch, time, features): ', feedback_model(multi_window.example[0]).shape)

In [None]:
history = compile_and_fit(feedback_model, multi_window, epochs=1000, learning_rate=1e-4)

IPython.display.clear_output()

multi_val_performance['AR LSTM'] = feedback_model.evaluate(multi_window.val)
multi_performance['AR LSTM'] = feedback_model.evaluate(multi_window.test, verbose=0)
plot_train_val_loss(history)

In [None]:
multi_window.plot(feedback_model)