###  Import libraries


In [49]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

### Hiperparámetros

In [50]:
csv_path          = "jena_climate_2009_2016.csv"
predicted_feature = 0 #T (degC)

train_perc = .7
val_perc   = .2

sequence_length = 24
offset          = 1
sampling_rate   = 6
batch_size      = 256

learning_rate   = 0.001
epochs          = 50
loss            = "mse"

### Read CSV

In [51]:
data = pd.read_csv(csv_path, parse_dates=True, index_col=0)

In [52]:
df = data.drop(['Tpot (K)', 'p (mbar)','wv (m/s)', 'max. wv (m/s)', 'wd (deg)'],axis = 1)
#df = data.drop(["Tpot (K)",
#                "Tdew (degC)",
#                "rh (%)",
#                "VPact (mbar)",
#                "H2OC (mmol/mol)",
#                "max. wv (m/s)",
#                "wd (deg)",],axis = 1)
df.head()

Unnamed: 0_level_0,T (degC),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2009-01-01 00:10:00,-8.02,-8.9,93.3,3.33,3.11,0.22,1.94,3.12,1307.75
2009-01-01 00:20:00,-8.41,-9.28,93.4,3.23,3.02,0.21,1.89,3.03,1309.8
2009-01-01 00:30:00,-8.51,-9.31,93.9,3.21,3.01,0.2,1.88,3.02,1310.24
2009-01-01 00:40:00,-8.31,-9.07,94.2,3.26,3.07,0.19,1.92,3.08,1309.19
2009-01-01 00:50:00,-8.27,-9.04,94.1,3.27,3.08,0.19,1.92,3.09,1309.0


### Step 7: Add periodic time intervals

In [53]:
timestamp_s = df.index
timestamp_s = timestamp_s.map(pd.Timestamp.timestamp)

In [54]:
day = 24*60*60
year = 365.2425 * day

In [55]:
df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

### Step 8: Splitting data

In [56]:
df_length = len(df)

train_length = int(df_length*train_perc)
val_length   = int(df_length*(train_perc+val_perc))

train_mean = df.values[:train_length].mean()
train_std  = df.values[:train_length].std()

df_norm = (df.values - train_mean) / train_std
df_norm = pd.DataFrame(df_norm)

### Step 9: Normalize data
- Only normalize data based on training data
    - Notice you should only normalize the training data - because validation and test data could affect the normalization
- Get the mean and standard deviation of the data
    - HINT: Use **.mean()** and **.std()** on the dataframe.
- Noramlize the data as follows
    - **train_df = (train_df - train_mean) / train_std** (assuming naming fits)
    - HINT: The transformation of validation and test data is done similarly with **train_mean** and **train_std**.

In [57]:
train_df = df_norm[:train_length-1]
val_df   = df_norm[train_length:val_length-1]
test_df_norm  = df_norm[val_length:]
test_df       = df[val_length:]

### Step 10: Create datasets


In [58]:
start = sequence_length*sampling_rate + offset*sampling_rate
end   = train_length + start

x_train = train_df
y_train = df_norm[[predicted_feature]][start:end]

dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train.values,
    y_train,
    sequence_length = sequence_length,
    sampling_rate = sampling_rate,
    shuffle=True,
    batch_size = batch_size
)

In [59]:
start = train_length + sequence_length*sampling_rate + offset*sampling_rate
end   = val_length + start

x_val = val_df
y_val = df_norm[[predicted_feature]][start:end]

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val.values,
    y_val,
    sequence_length = sequence_length,
    sampling_rate = sampling_rate,
    shuffle=False,
    batch_size = batch_size
)

In [60]:
start = val_length + sequence_length*sampling_rate + offset*sampling_rate
x_end = len(test_df_norm) - sequence_length*sampling_rate - offset*sampling_rate

x_test = test_df_norm[:x_end]
y_test = df_norm[[predicted_feature]][start:]

dataset_test = keras.preprocessing.timeseries_dataset_from_array(
    x_test.values,
    y_test,
    sequence_length = sequence_length,
    sampling_rate = sampling_rate,
    shuffle=False,
    batch_size = batch_size
)

In [61]:
def create_dataset(df, input_width=24, offset=0, predict_column=0):
    x = []
    y = []
    data_x = df.to_numpy()
    data_y = df[predict_column].to_numpy()
    
    for i in range(input_width, len(data_x) - offset):
        x.append(data_x[i - input_width:i,:])
        y.append(data_y[i + offset])
        
    x = np.array(x)
    y = np.array(y)
    
    return x, y.reshape(-1,1)

In [62]:
test_df_norm = test_df_norm[5::6]
x_norm, y_norm = create_dataset(test_df_norm, input_width = sequence_length, predict_column = predicted_feature)

test_df = test_df[5::6]
x_real, y_real = create_dataset(pd.DataFrame(test_df.values), input_width = sequence_length, predict_column = predicted_feature)

In [69]:
print(x_norm[1])
print(y_norm)

[[-0.31509975 -0.32009653 -0.04545988 -0.29973698 -0.30215778 -0.31956892
  -0.30966847 -0.30221986  3.60470299 -0.32317741 -0.31912238 -0.3245818
  -0.32028285]
 [-0.31640326 -0.31925856 -0.03149373 -0.30038874 -0.30178535 -0.32059311
  -0.30942018 -0.30181639  3.61004116 -0.32239482 -0.31891268 -0.32458057
  -0.32028099]
 [-0.31612394 -0.31975514 -0.03676983 -0.30026459 -0.3020026  -0.32022068
  -0.30957536 -0.30206468  3.60876869 -0.32158462 -0.31891268 -0.3234453
  -0.31924864]
 [-0.31618601 -0.32034482 -0.04018378 -0.30029563 -0.30228193 -0.32000342
  -0.30973054 -0.302344    3.60904801 -0.32080203 -0.31912238 -0.32344334
  -0.31924759]
 [-0.31646533 -0.32037585 -0.03863198 -0.30041977 -0.30231296 -0.32012757
  -0.30973054 -0.302344    3.60932734 -0.32010038 -0.31952748 -0.32344137
  -0.31924655]
 [-0.31662051 -0.32040689 -0.03770091 -0.30051288 -0.30231296 -0.32018964
  -0.30973054 -0.302344    3.60920319 -0.31952748 -0.32010038 -0.32343941
  -0.31924551]
 [-0.31680673 -0.3205310

### Step 11: Create model
- Create the following model
    - **model = models.Sequential()**
    - **model.add(layers.LSTM(32, return_sequences=True, input_shape=train_ds[0].shape[1:]))**
    - **model.add(layers.Dense(units=1))**

In [15]:
for batch in dataset_train.take(1):
    x, y = batch
    
input_shape = x.shape[1], x.shape[2]

In [16]:
inputs = keras.layers.Input(input_shape)
#lstm_layer = keras.layers.LSTM(32, return_sequences=True)(inputs)
lstm_layer2 = keras.layers.LSTM(32)(inputs)
output = keras.layers.Dense(1)(lstm_layer2)

model = keras.Model(inputs, output)
model.compile(keras.optimizers.Adam(learning_rate), loss)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 24, 13)]          0         
_________________________________________________________________
lstm (LSTM)                  (None, 32)                5888      
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 5,921
Trainable params: 5,921
Non-trainable params: 0
_________________________________________________________________


### Step 12: Train model
- Compile and fit the model
- Complie the model as follows
    - **model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])**
- Fit the model as follows
    - **model.fit(x=train_ds[0], y=train_ds[1], validation_data=(val_ds[0], val_ds[1]), epochs=5)**

In [17]:
history = model.fit(dataset_train, epochs = epochs, validation_data=dataset_val)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50

KeyboardInterrupt: 

In [None]:
def visualize_loss(history, title):
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(len(loss))
    plt.figure()
    plt.plot(epochs, loss, "b", label="Training loss")
    plt.plot(epochs, val_loss, "r", label="Validation loss")
    plt.title(title)
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

visualize_loss(history, "Training and Validation Loss")

### Step 13: Predict data
- Apply the model on the test data
    - HINT: Use **model.predict(x)**, where **x** is assigned to the test data.

In [None]:
y_pred = model.predict(x_norm)
y_pred = y_pred * train_std + train_mean

### Step 14: Plot the result
- Plot a window of the data predicted together with the actual data.
- One way:
    - **fig, ax = plt.subplots()**
    - **ax.plot(y[i:i+96*2,0], c='g')**
    - **ax.plot(pred[i:i+96*2,-1,0], c='r')**
- It will plot a window of 96 hours, where you can index with **i** (**i=150** as an example) and **y** is the real values and **pred** are the predicted values

In [None]:
fig, ax = plt.subplots()

ax.plot(y_real[0:100], c='g', label="Test Data")
ax.plot(y_pred[0:100], c='r', label="Prediction")
plt.legend()

In [None]:
fig, ax = plt.subplots()

ax.plot(y_real[500:600], c='g', label="Test Data")
ax.plot(y_pred[500:600], c='r', label="Prediction")
plt.legend()

In [None]:
fig, ax = plt.subplots()

ax.plot(y_real[200:300], c='g', label="Test Data")
ax.plot(y_pred[200:300], c='r', label="Prediction")
plt.legend()

In [None]:
fig, ax = plt.subplots()

ax.plot(y_real[950:1050], c='g', label="Test Data")
ax.plot(y_pred[950:1050], c='r', label="Prediction")
plt.legend()

In [None]:
fig, ax = plt.subplots()

ax.plot(y_real[1000:1100], c='g', label="Test Data")
ax.plot(y_pred[1000:1100], c='r', label="Prediction")
plt.legend()

In [None]:
def show_plot(plot_data, delta, title):
    labels = ["History", "True Future", "Model Prediction"]
    marker = [".-", "rx", "go"]
    time_steps = list(range(-(plot_data[0].shape[0]), 0))
    if delta:
        future = delta
    else:
        future = 0

    plt.title(title)
    for i, val in enumerate(plot_data):
        if i:
            plt.plot(future, plot_data[i], marker[i], markersize=10, label=labels[i])
        else:
            plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
    plt.legend()
    plt.xlim([time_steps[0], (future + 5) * 2])
    plt.xlabel("Time-Step")
    plt.show()
    return


for x, y in dataset_test.take(5):
    show_plot(
        [x[0][:, 1].numpy(), y[0].numpy(), model.predict(x)[0]],
        12,
        "Single Step Prediction",
    )