In [None]:
### 1 - Implement Basic RNN network with Keras to predict time series ###

### 1.1 Prepare the data ###

### Importing Libraries ###

import tensorflow as tf 
import tensorflow.keras as keras
import tensorflow.keras.layers
import random
import numpy as np

### Setting Random to ensure deterministic results ###
import os
seed_value = 1
os.environ['PYTHONHASHSEED'] = str(seed_value)
def reset_random_seeds():
  tf.random.set_seed(seed_value)
  np.random.seed(seed_value)
  random.seed(seed_value)
reset_random_seeds()

### Prepare the Preprocess Data Here ### 
from pandas import read_csv

### Design a function to Prepare Observation Sequence and Corresponding Labels ###

def create_dataset(dataset, look_back=12):
  dataX, dataY = [],[]
  for i in range(len(dataset)-look_back):
    m = dataset[i:(i+look_back),0]
    dataX.append(m)
    dataY.append(dataset[i + look_back, 0])
  return np.array(dataX), np.array(dataY)

  ### Train and Test Data Loading with float32 type ###
dataframe_train = read_csv('train.txt', usecols=[1],engine='python') #Read train.txt
dataset_train = dataframe_train.values
dataset_train = dataset_train.astype('float32') # Specify the data type to 'float32'

dataframe_test = read_csv('test.txt', usecols=[1],engine ='python') #Read test.txt
dataset_test = dataframe_test.values
dataset_test = dataset_train.astype('float32')

### Scale Training and Test Data to [0,1] ###
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1)) # sepcify the training data
train = scaler.fit_transform(dataset_train) # fit the scalar to the test data
test = scaler.fit_transform(dataframe_test)

### Training and Test Data Split ###
trainX, trainY = create_dataset(train,look_back=12)
testX,testY = create_dataset(test, look_back =12)

### Training and Test Data Reshape (to fit RNN input) ###
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX,(testX.shape[0], 1, testX.shape[1]))

### Print Out the TEST Data and Labels Here ###
print(testX)
print(testY)

### 1.2 Build the RNN model ### 

### Build the RNN Model ###
import keras
from keras.models import Sequential

keras.backend.clear_session()

model = Sequential() # Declare Sequential class and assign it to variable "model"
model.add(keras.layers.SimpleRNN(units=4)) # Add a simple RNN layer with unit_size = 4 in the model
model.add(keras.layers.Dense(units=1)) # Add a following Dense layer with units 1

## Compile the RNN Model ###
opt = keras.optimizers.Adam(learning_rate=0.005)
model.compile(loss='mean_squared_error', optimizer='adam') # model compile with mean_squared_error loss and adam optimizer

## Train the RNN Model ###

model.fit(trainX, trainY,epochs=1000, batch_size=10,verbose=2) # model fit with epoch=1000, batch_size =10; verbose = 2 is optional
model.summary() # print out model structure with model.summary()

### 1.3 Evaluate Predictive Model Performance ###

### Make Predictions ###

trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

### Scale Back Predictions ###

trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

### Calculate root mean squared error (RSME) ###
import math
from sklearn.metrics import mean_squared_error 

trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
print('Test Score: %.2f RMSE' % (testScore))

## Plot Observation Data and Prediction Results with TEST dataset ###

import matplotlib.pyplot as plt

plt.plot(testY[0]) # Plot Observations in Test Set
plt.plot(testPredict) # Plot Predictions in Test Set
plt.show()

### 2 -  Build an LSTM Model to conduct sentiment analysi. ###

### 2.1 Prepare the Data ###
import tensorflow as tf
import tensorflow.keras as keras
from keras import layers
import random
import numpy as np


### Set random seed to ensure deterministic results
import os
seed_value = 1
os.environ['PYTHONHASHSEED']=str(seed_value)
def reset_random_seeds():
   tf.random.set_seed(seed_value)
   np.random.seed(seed_value)
   random.seed(seed_value)

reset_random_seeds() # randomly set initial data

# Prepare the data here
words =1000
review = 100

max_features = words  # Only consider the top 1k words
maxlen = review  # Only consider the first 100 words of each movie review

(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(num_words=max_features ) # load IMDB data with specified num_words = 1000; testing set is set to validation set.
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen ) # Pad IMDB training data with specified maxlen=100
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen ) # Pad IMDB validation data with specified maxlen=100

### Model design with Embedding and LSTM layers ####
inputs = keras.Input(shape=(None,), dtype="int32") # This is an easy way to set an adaptive length for input sequence
x = layers.Embedding(max_features,8 )(inputs) # Embed data in an 8-dimensional vector
x = layers.LSTM(8 )(x) # Add 1st layer of LSTM with 8 hidden states (aka units)
outputs = layers.Dense(1,activation="sigmoid" )(x) # Add a classifier with units=1 and activation="sigmoid"

### Clear cached model to refresh memory and build new model for training ###
keras.backend.clear_session() # Clear cached model
model = keras.Model(inputs, outputs) # Build new keras model
model.summary() # Print out model summary

model.compile("adam","binary_crossentropy", metrics=["accuracy"]) # Compile built model with "adam", "binary_crossentropy", and metrics=["accuracy"]
model.fit(x_train, y_train,epochs=10, batch_size=64,verbose=2 ) # Train the compiled model with model.fit()

### 2.3 - LSTM hyperparameter tuning ###

########################### Scenario 1 ###########################
##################################################################

### Set random seed to ensure deterministic results ###
import os
seed_value = 1
os.environ['PYTHONHASHSEED']=str(seed_value)
def reset_random_seeds():
   tf.random.set_seed(seed_value)
   np.random.seed(seed_value)
   random.seed(seed_value)

reset_random_seeds() # randomly set initial data

words = 1000
review = 100
max_features = words # Only consider the top 1k words
maxlen =  review # Only consider the first 100 words of each movie review

### Model design with Embedding and LSTM layers ####
inputs = keras.Input(shape=(None,), dtype="int32") # This is an easy way to set an adaptive length for input sequence
x = layers.Embedding(max_features, 16 )(inputs) # Embed data in a 16-dimensional vector
x = layers.LSTM(16, return_sequences="true" )(x) # Add 1st layer of LSTM with 16 hidden states (aka units); set return_sequences=true.
x = layers.LSTM(16 )(x) # Add 2nd layer of LSTM with 16 hidden states (aka units)
outputs = layers.Dense(1, activation="sigmoid" )(x) # Add a classifier with units=1 and activation="sigmoid"

### Clear cached model to refresh memory and build new model for training ###
keras.backend.clear_session() # Clear cached model
model = keras.Model(inputs, outputs) # Build new keras model
model.summary() # Print out model summary

model.compile("adam","binary_crossentropy",metrics=["accuracy"] ) # Compile built model with "adam", "binary_crossentropy", and metrics=["accuracy"]
model.fit( x_train, y_train,epochs=10, batch_size=64,validation_data=(x_val, y_val),verbose=2 ) # Train the compiled model using model.fit() with batch_size=64, epochs=10, and validation_data=(x_val, y_val)

########################### Scenario 2 ###########################
##################################################################

### Set random seed to ensure deterministic results ###
import os
seed_value = 1
os.environ['PYTHONHASHSEED']=str(seed_value)
def reset_random_seeds():
   tf.random.set_seed(seed_value)
   np.random.seed(seed_value)
   random.seed(seed_value)

reset_random_seeds() # randomly set initial data
word = 1000 
maxlen = 100
max_features = word  # Only consider the top 1k words
maxlen = review # Only consider the first 100 words of each movie review

### Model design with Embedding and LSTM layers ####
inputs = keras.Input(shape=(None,), dtype="int32") # This is an easy way to set an adaptive length for input sequence
x = layers.Embedding(max_features,128 )(inputs) # Embed data in a 128-dimensional vector
x = layers.LSTM(128, return_sequence = "true" )(x) # Add 1st layer of LSTM with 128 hidden states (aka units); set return_sequences=true.
x = layers.LSTM(128 )(x) # Add 2nd layer of LSTM with 128 hidden states (aka units)
outputs = layers.Dense(1,activation = "sigmoid" )(x) # Add a classifier with units=1 and activation="sigmoid"

### Clear cached model to refresh memory and build new model for training ###
keras.backend.clear_session() # Clear cached model
model = keras.Model(inputs, outputs) # Build new keras model
model.summary() # Print out model summary

model.compile("adam","binary_crossentropy",metrics=["accuracy"] ) # Compile built model with "adam", "binary_crossentropy", and metrics=["accuracy"]
model.fit(x_train, y_train,epochs=10, batch_size=64,validation_data=(x_val, y_val),verbose=2 ) # Train the compiled model using model.fit() with batch_size=64, epochs=10, and validation_data=(x_val, y_val)

########################### Scenario 3 ###########################
##################################################################

### Set random seed to ensure deterministic results ###
import os
seed_value = 1
os.environ['PYTHONHASHSEED']=str(seed_value)
def reset_random_seeds():
   tf.random.set_seed(seed_value)
   np.random.seed(seed_value)
   random.seed(seed_value)

reset_random_seeds() # randomly set initial data
words = 1000
review = 200
max_features = words   # Only consider the top 1k words
maxlen = review # Only consider the first 200 words of each movie review

### Model design with Embedding and LSTM layers ####
inputs = keras.Input(shape=(None,), dtype="int32") # This is an easy way to set an adaptive length for input sequence
x = layers.Embedding( )(inputs) # Embed data in a 128-dimensional vector
x = layers.LSTM( 128,return_sequences="true")(x) # Add 1st layer of LSTM with 128 hidden states (aka units); set return_sequences=true.
x = layers.LSTM(128 )(x) # Add 2nd layer of LSTM with 128 hidden states (aka units)
outputs = layers.Dense(1,activation="sigmoid" )(x) # Add a classifier with units=1 and activation="sigmoid"

### Clear cached model to refresh memory and build new model for training ###
keras.backend.clear_session() # Clear cached model
model = keras.Model(inputs, outputs) # Build new keras model
model.summary() # Print out model summary

model.compile("adam","binary_crossentropy",metrics=["accuracy"] ) # Compile built model with "adam", "binary_crossentropy", and metrics=["accuracy"]
model.fit(x_train, y_train,epochs=10, batch_size=64,validation_data=(x_val, y_val),verbose=2 ) # Train the compiled model using model.fit() with batch_size=64, epochs=10, and validation_data=(x_val, y_val)