- **Topics**
    - Recurrent dropout
    - Stacking recurrent layers
    - Bidirectional recurrent layers

### Example: A temperature-forcasting problem

In [None]:
import os
import numpy as np
import pandas as pd

os.chdir('C:/Users/HK/Desktop/Github/Deep-Learning/Deep-Learning-with-Python')

In [None]:
data_dir = 'datasets'
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')

In [None]:
data = pd.read_csv(fname)
data.head()

In [None]:
float_data = np.asarray(data.iloc[:, 1:])

- Plotting the temperature timeseries

In [None]:
import matplotlib.pyplot as plt

In [None]:
temp = float_data[:, 1]
plt.plot(range(len(temp)), temp)
plt.show()

# first 10 days (by 10 minutes)
times = 6*24*10
plt.plot(range(times), temp[:times])
plt.show()

- daily periodicity

    Predicting average temperature for the next month given a few months of past data would be easy.
    
    Is this timeseries predictable at a daily scale?

#### Preparing the data

In [None]:
float_data.shape

In [None]:
# first 200,000 timesteps as training data
n_training = 200000

# normarlizing the data
mean = float_data[:n_training].mean(axis = 0)
std = float_data[:n_training].std(axis = 0)
float_data -= mean
float_data /= std

- Generator yielding timeseries samples and their targets

    **[arguments]**
    
    - `data`: The original array of float_data
    - `lookback`: How many timesteps back the input dat should go
    - `delay`: How many timesteps in the future the target should be
    - `min_index`, `max_index`: Indices in the `data` array that delimit which timesteps to draw from
    - `shuffle`: Whether to shuffle the samples
    - `batch_size`: The number of samples per batch
    - `step`: The period in timesteps, at which you sample data (ex. step = 6 draw one data point every hour)

In [None]:
def generator(data, lookback, delay, min_index, max_index, 
              shuffle = False, batch_size = 128, step = 6):
    
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size = batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)

        samples = np.zeros((len(rows),
                           lookback // step,
                           data.shape[-1]))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples, targets

In [None]:
lookback = 1440    # 10 days
step = 6
delay = 144
batch_size = 128   

train_gen = generator(float_data,
                      lookback = lookback,
                      delay = delay,
                      min_index = 0,
                      max_index = 200000,
                      shuffle = True)

val_gen = generator(float_data,
                    lookback = lookback,
                    delay = delay,
                    min_index = 200001,
                    max_index = 300000)

test_gen = generator(float_data,
                     lookback = lookback,
                     delay = delay,
                     min_index = 300001,
                     max_index = None)

# How many steps to draw from val_gen to see the entire validation set
val_steps = (300000 - 200001 - lookback)
# How many steps to draw from test_gen to see the entire test set
test_steps = (len(float_data) - 300001 - lookback)

print('validation steps:', val_steps)
print('test steps:', test_steps)

### A first recurrent baseline : GRU (Gated Recurrent Unit)

In [None]:
from keras.models import Sequential
from keras.layers import GRU, Dense
from keras.optimizers import RMSprop

In [None]:
input_shape = (None, float_data.shape[-1])
print(input_shape)

In [None]:
model = Sequential()
model.add(GRU(32, input_shape = input_shape))
model.add(Dense(1))

model.compile(optimizer = RMSprop(), loss = 'mae')

history = model.fit_generator(train_gen, 
                              steps_per_epoch = 500,
                              epochs = 20,
                              validation_data = val_gen,
                              validation_steps = val_steps)

### Using recurrent dropout to prevent overfitting
dropout-regularized, GRU based

In [None]:
model2 = Sequential()
model2.add(GRU(32, input_shape = input_shape,
               dropout = 0.2, recurrent_dropout = 0.2))
model2.add(Dense(1))

model2.compile(optimizer = RMSprop(), loss = 'mae')

history2 = model2.fit_generator(train_gen,
                                steps_per_epoch = 500,
                                epochs = 20,
                                validation_data = val_gen,
                                validation_steps = val_steps)

### Stacking recurrent layers
dropout-regularized, stacked GRU

In [None]:
model3 = Sequential()
model3.add(GRU(32, input_shape = input_shape, 
               dropout = 0.1, recurrent_dropout = 0.5,
               return_sequences = True))
model3.add(GRU(64, activation = 'relu', 
               dropout = 0.1, recurrent_dropout = 0.5))
model3.add(Dense(1))

model3.compile(optimizer = RMSprop(), loss = 'mae')
history3 = model3.fit_generator(train_gen,
                                steps_per_epoch = 500,
                                epochs = 20,
                                validation_data = val_gen,
                                validation_steps = val_steps)

### Using bidirectional RNNs
look at a sequence both ways

In [None]:
from keras.datasets import imdb
from keras.preprocessing import sequence

In [None]:
max_features = 10000
max_length = 500      # cut off texts after this many word
batch_size = 32

In [None]:
print('Loading data...')
(input_train, y_train), (input_test, y_test) = imdb.load_data(num_words = max_features)
print(len(input_train), 'train sequences')
print(len(input_test), 'test sequences')

In [None]:
print('Pad sequences (samples x time)')
input_train = sequence.pad_sequences(input_train, maxlen = max_length)
input_test = sequence.pad_sequences(input_test, maxlen = max_length)
print('input_train shape:', input_train.shape)
print('input_test shape:', input_test.shape)

In [None]:
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, GRU, Dense

In [None]:
model_biLSTM = Sequential()
model_biLSTM.add(Embedding(10000, 32))
model_biLSTM.add(Bidirectional(LSTM(32)))
model_biLSTM.add(Dense(1, activation = 'sigmoid'))

model_biLSTM.compile(optimizer = 'rmsprop',
                     loss = 'binary_crossentropy',
                     metrics = ['acc'])

model_biLSTM.summary()

In [None]:
history_biLSTM = model_biLSTM.fit(input_train, y_train, validation_split = 0.2,
                                  epochs = 10, batch_size = 128)

In [None]:
model_biLSTM.save_weights('IMDB-bidirectional-LSTM.h5')

In [None]:
import matplotlib.pyplot as plt

In [None]:
def history_plot(history):
    
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(1, len(acc)+1)

    plt.plot(epochs, loss, 'k', label = 'Training Loss')
    plt.plot(epochs, val_loss, ':r', label = 'Valdiation Loss')
    plt.legend()
    plt.title('Loss')
    plt.show()
    
    plt.plot(epochs, acc, '-k', label = 'Training Acc')
    plt.plot(epochs, val_acc, ':r', label = 'Validation Acc')
    plt.legend()
    plt.title('Accuracy')
    plt.show()

In [None]:
history_plot(history_biLSTM)

In [None]:
model_biLSTM.evaluate(input_test, y_test)