___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___

<h1 style="text-align: center;">Deep Learning<br><br>Session - 9<br><br>RNN Syntax Basics<br><br>Sine Wave<br><h1>

# RNN Syntax Basics

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")
warnings.warn("this will not show")

plt.rcParams["figure.figsize"] = (10,6)

sns.set_style("whitegrid")
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# Set it None to display all rows in the dataframe
# pd.set_option('display.max_rows', None)

# Set it to None to display all columns in the dataframe
pd.set_option('display.max_columns', None)

## Creating Data

In [None]:
x = np.linspace(0,50,501)
y = np.sin(x)

In [None]:
x

In [None]:
y

In [None]:
plt.plot(x,y)

Let's turn this into a DataFrame

In [None]:
df = pd.DataFrame(data=y,index=x,columns=['Sine'])

In [None]:
df

## Preprocessing of Data

### Train Test Split

In [None]:
len(df)

In [None]:
test_percent = 0.15

In [None]:
len(df)*test_percent

In [None]:
test_point = np.round(len(df)*test_percent)
test_point

In [None]:
test_ind = int(len(df) - test_point)
test_ind

In [None]:
train = df.iloc[:test_ind]
test = df.iloc[test_ind:]

In [None]:
train

In [None]:
test

### Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

## Time Series Generator

In [None]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
#help(TimeseriesGenerator)

In [None]:
# define generator
length = 2 # Length of the output sequences (in number of timesteps)
batch_size = 1 # Number of timeseries samples in each batch
stride = 1 # Period between successive output sequences
generator = TimeseriesGenerator(data = train_scaled,
                                targets = train_scaled,
                                length = length,
                                batch_size = batch_size,
                                stride = stride)

In [None]:
len(train_scaled)

In [None]:
len(generator) # = (len(train)-lenght)/(batch_size*stride) 

In [None]:
generator

In [None]:
generator[0]

In [None]:
# What does the first batch look like?
X, y = generator[0]

In [None]:
print(f'Given the Array: \n{X}')
print(f'Predict this y: \n{y}')

In [None]:
train_scaled[:10]

In [None]:
df.plot()

The more length the more training time

In [None]:
length = 64 # Length of the output sequences (in number of timesteps)
batch_size = 1 # Number of timeseries samples in each batch
generator = TimeseriesGenerator(data = train_scaled, targets = train_scaled, length = length, batch_size = batch_size)

In [None]:
# What does the first batch look like?
X, y = generator[0]

In [None]:
len(generator)

In [None]:
print(f'Given the Array: \n{X.flatten()}')
print(f'Predict this y: \n{y}')

## Modelling

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM

In [None]:
# We're only using one feature in our time series
n_features = 1

In [None]:
seed = 42

In [None]:
# define model
model = Sequential()

# Simple RNN layer
model.add(SimpleRNN(units = 100, activation = "tanh", input_shape=(length, n_features)))

# Final Prediction
model.add(Dense(units = 1))

model.compile(optimizer='adam', loss='mse')

In [None]:
model.summary()

In [None]:
# params = 1 * ((size_of_input + 1) * size_of_output + size_of_output^2)
1 * ((1+1)*100+(100**2))

In [None]:
model.fit_generator(generator = generator, epochs = 5)

In [None]:
loss_df = pd.DataFrame(model.history.history)
loss_df.plot()

## Evaluate on Test Data

In [None]:
first_eval_batch = train_scaled[-length:]

In [None]:
first_eval_batch.shape

In [None]:
generator[0][0].shape

In [None]:
first_eval_batch = first_eval_batch.reshape((1, length, n_features))
# first_eval_batch = np.expand_dims(first_eval_batch, axis=0)

In [None]:
first_eval_batch.shape

In [None]:
model.predict(first_eval_batch)

In [None]:
test_scaled[0]

In [None]:
test_predictions = []

first_eval_batch = train_scaled[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

In [None]:
current_batch.shape

In [None]:
current_batch

In [None]:
np.append(current_batch[:,1:,:], [[[0.0085]]], axis = 1)

### Final For Loop to predict step by step 

In [None]:
predictions_scaled = []

first_eval_batch = train_scaled[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead
    current_pred = model.predict(current_batch)
    
    # store prediction
    predictions_scaled.append(current_pred[0]) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:, 1:, :], [current_pred], axis = 1)

In [None]:
predictions_scaled

In [None]:
test_scaled

## Inverse Transformations and Comparing

In [None]:
predictions = scaler.inverse_transform(predictions_scaled)

In [None]:
predictions

In [None]:
test

In [None]:
test['RNN_Predictions'] = predictions

In [None]:
test

In [None]:
test.plot()

## Early Stopping and Validation Generator

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=2)

In [None]:
length = 64 # lenght range have to be shorter than range of test data
            # In this case lenght range = 64, test range = 75
batch_size =1

generator = TimeseriesGenerator(data = train_scaled, targets = train_scaled, length = length, batch_size = batch_size)

validation_generator = TimeseriesGenerator(data = test_scaled, targets = test_scaled, length = length, batch_size = batch_size)

In [None]:
len(validation_generator)

## LSTM

In [None]:
# define model
model = Sequential()

# LSTM layer
model.add(LSTM(units = 100, activation = "tanh", input_shape = (length, n_features)))

# Final Prediction
model.add(Dense(units = 1))

model.compile(optimizer = 'adam', loss = 'mse')

In [None]:
model.summary()

In [None]:
# params = 4 * ((size_of_input + 1) * size_of_output + size_of_output^2)
4 * ((1+1)*100+(100**2))

In [None]:
model.fit_generator(generator = generator,
                    validation_data = validation_generator,
                    epochs = 20,
                    callbacks = [early_stop])

In [None]:
loss_df = pd.DataFrame(model.history.history)
loss_df.plot()

In [None]:
predictions_scaled = []

first_eval_batch = train_scaled[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(len(test)):
    
    # get prediction 1 time stamp ahead 
    current_pred = model.predict(current_batch)
    
    # store prediction
    predictions_scaled.append(current_pred[0]) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:, 1:, :], [current_pred], axis = 1)

In [None]:
predictions = scaler.inverse_transform(predictions_scaled)
test['LSTM Predictions'] = predictions
test

In [None]:
test.plot()

## Retrain and Forecasting

In [None]:
full_scaler = MinMaxScaler()
scaled_full_data = full_scaler.fit_transform(df)

In [None]:
length = 64 
generator = TimeseriesGenerator(scaled_full_data, scaled_full_data, length = length, batch_size = 1)

In [None]:
model = Sequential()
model.add(LSTM(100, input_shape = (length, n_features)))
model.add(Dense(1))
model.compile(optimizer = 'adam', loss = 'mse')
model.fit_generator(generator, epochs = 6)

In [None]:
forecast = []

first_eval_batch = scaled_full_data[-length:]
current_batch = first_eval_batch.reshape((1, length, n_features))

for i in range(length):
    
    # get prediction 1 time stamp ahead 
    current_pred = model.predict(current_batch)
    
    # store prediction
    forecast.append(current_pred[0]) 
    
    # update batch to now include prediction and drop first value
    current_batch = np.append(current_batch[:, 1:, :], [current_pred], axis = 1)

In [None]:
forecast = scaler.inverse_transform(forecast)

In [None]:
forecast

In [None]:
df

In [None]:
df.plot()

In [None]:
forecast_len = len(forecast)
forecast_len

In [None]:
step = 0.1

In [None]:
forecast_len * step

In [None]:
forecast_index = np.arange(50.1, 56.5, step = step)

In [None]:
len(forecast_index)

In [None]:
plt.plot(df.index, df['Sine'])
plt.plot(forecast_index, forecast);

___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___