<a href="https://colab.research.google.com/github/dominiksakic/NETworkingMay/blob/main/19_adv_RNNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip
!unzip jena_climate_2009_2016.csv.zip

In [6]:
import os
import numpy as np
from matplotlib import pyplot as plt

In [7]:
fname = os.path.join("jena_climate_2009_2016.csv")

# Text file
with open(fname) as f:
  data = f.read()

# Transform data
lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:] # exclude the headers

# Store data in numpy, for working with it later
temperature = np.zeros((len(lines),))
raw_data = np.zeros((len(lines), len(header) - 1)) # Minus the Datetime!

for i, line in enumerate(lines):
  # remove Datetime from lines
  values = [float(x) for x in line.split(",")[1:]]
  raw_data[i, :] = values[:]

In [8]:
from tensorflow import keras

# prepare the data
num_train_samples = int(0.5 * len(raw_data))
num_val_samples = int(0.25 * len(raw_data))
num_test_samples = len(raw_data) - num_train_samples - num_val_samples

# Normalize data, only on the train_samples NO DATA LEAKAGE
mean = raw_data[:num_train_samples].mean(axis=0)
raw_data -= mean
std = raw_data[:num_train_samples].std(axis=0)
raw_data /= std

sampling_rate = 6
sequence_length = 120
delay = sampling_rate * (sequence_length + 24 - 1)
batch_size = 256


train_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=0,
    end_index=num_train_samples)

val_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples)

test_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples + num_val_samples)

In [None]:
inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))
x = layers.LSTM(32, recurrent_dropout=0.25)(inputs)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

callbacks = [
    keras.callbacks.ModelCheckpoint("jena_lstm_dropout.keras",
                                    save_best_only=True)
]

model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])

history = model.fit(
    train_dataset,
    epochs=50,
    validation_data=val_dataset,
    callbacks=callbacks)

Epoch 1/50
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 211ms/step - loss: 0.0258 - mae: 0.0831 - val_loss: 4.2513e-07 - val_mae: 5.1530e-04
Epoch 2/50
[1m819/819[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m159s[0m 193ms/step - loss: 6.4645e-06 - mae: 0.0019 - val_loss: 6.8744e-07 - val_mae: 7.6528e-04
Epoch 3/50
[1m429/819[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m1:03[0m 164ms/step - loss: 1.8209e-06 - mae: 0.0010

In [None]:
history = history.history

loss = history["loss"]
val_loss = history["val_loss"]
epochs = range(len(loss))
plt.figure()
plt.plot(epochs, loss, "bo", label="Training loss")
plt.plot(epochs, val_loss, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

In [None]:
mae = history["mae"]
val_mae = history["val_mae"]
plt.figure()
plt.plot(epochs, loss, "bo", label="Training MAE")
plt.plot(epochs, val_loss, "b", label="Validation MAE")
plt.title("Training and validation MAE")
plt.legend()
plt.show()