# Time series and RNNs

<h3>Different problems</h3>
There are different problems related to time series:

1. prediction or forcasting
2. Imputation (Projecting back into the past or filling holes - the missing data)
3. Detect anomalies
4. Detecting patterns

<h3>patterns</h3>

1. trends: upside or downward trends
2. seasonalities: repeated patterns
3. Combination of trends and seasonalities
4. random values. 
5. Auto correlation

In reality time series are the combination of all of these patterns: Trend + seasonality + Auto correlation + Noise

## A temperature-forecasting example

The data is downloaded from the following link:

https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip

In [None]:
import pandas as pd
df = pd.read_csv("files/jena_climate_2009_2016.csv")
df

The following picture shows the plot of temperature (in degrees Celsius) over time. On this plot, you can clearly see the yearly periodicity of temperature—the data spans 8 years.


In [None]:
df["T (degC)"].plot()

In [None]:
import numpy as np
raw_data = np.zeros([df.values.shape[0] , df.values.shape[1]-1])
raw_data[:] = df.values[:,1:]


In [None]:
temperature = np.zeros(df["T (degC)"].shape)
temperature[:] = df["T (degC)"]

In [None]:
num_train_samples = int(0.5 * len(raw_data))
num_val_samples = int(0.25 * len(raw_data))
num_test_samples = len(raw_data) - num_train_samples - num_val_samples

In [None]:
mean = raw_data[:num_train_samples].mean(axis=0)

In [None]:
std = raw_data[:num_train_samples].std(axis=0)

In [None]:
raw_data -= mean
raw_data /= std

In [None]:
raw_data

In [None]:
import tensorflow as tf
from tensorflow import keras
sampling_rate = 6
sequence_length = 120
delay = sampling_rate * (sequence_length + 24 - 1)
batch_size = 256
train_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=0,
    end_index=num_train_samples)
val_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples)
test_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-delay],
    targets=temperature[delay:],
    sampling_rate=sampling_rate,
    sequence_length=sequence_length,
    shuffle=True,
    batch_size=batch_size,
    start_index=num_train_samples + num_val_samples)

In [None]:
def evaluate_naive_method(dataset):
    total_abs_err = 0.
    samples_seen = 0
    for samples, targets in dataset:
        preds = samples[:, -1, 1] * std[1] + mean[1]
        total_abs_err += np.sum(np.abs(preds - targets))
        samples_seen += samples.shape[0]
    return total_abs_err / samples_seen

In [None]:
evaluate_naive_method(test_dataset)

In [None]:
evaluate_naive_method(val_dataset)

 Now the game is to use your knowledge of deep learning to do better. better than 2.44 on the validation set. You need to beat the baeline 

In [None]:
model = keras.Sequential([
    keras.layers.Input(shape=(sequence_length, raw_data.shape[-1])),
    keras.layers.Flatten(),
    keras.layers.Dense(16,activation="relu"),
    keras.layers.Dense(1)
])
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

In [None]:
import matplotlib.pyplot as plt
fig, axis = plt.subplots(1,1)
axis.plot(np.arange(10), history.history["val_mae"] , "b")
axis.plot(np.arange(10), history.history["mae"] , "bo")

In the following code, we create the same model using the functional API:

In [None]:
inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]), name="features")
x = keras.layers.Flatten()(inputs)
x = keras.layers.Dense(16, activation="relu")(x)
outputs = keras.layers.Dense(1, name="temperature")(x)
model = keras.Model(inputs=inputs , outputs=outputs)
model.compile(optimizer="rmsprop" , loss={"temperature":"mse"}, metrics=["mae"])
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

In [None]:
fig, axis = plt.subplots(1,1)
axis.plot(np.arange(10), history.history["val_mae"] , "b")
axis.plot(np.arange(10), history.history["mae"] , "bo")

In [None]:
model = keras.Sequential([
    keras.layers.Input(shape=(sequence_length, raw_data.shape[-1])),
    keras.layers.LSTM(16),
    keras.layers.Dense(1)
])
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)

In [None]:
fig, axis = plt.subplots(1,1)
axis.plot(np.arange(1,10), history.history["val_mae"][1:] , "b")
axis.plot(np.arange(1,10), history.history["mae"][1:] , "bo")

In [None]:
inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]), name="features")
x = keras.layers.LSTM(16)(inputs)
outputs = keras.layers.Dense(1, name="temperature")(x)
model = keras.Model(inputs=inputs , outputs=outputs)
model.compile(optimizer="rmsprop" , loss={"temperature":"mse"}, metrics=["mae"])
history = model.fit(train_dataset, validation_data=val_dataset, epochs=10)