<a href="https://colab.research.google.com/github/Whitchurch/Tensorflow/blob/main/Team17_CNN_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install tensorflow-GPU

Install Tensorflow on Google Co-lab

In [9]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import BatchNormalization
from keras.layers import AveragePooling2D
from keras.layers import Flatten
from keras.layers import CuDNNLSTM # A superior LSTM that uses GPU more optimized for training.
from keras.layers import LSTM
from keras.models import Sequential

Start defining the layers of the CNN - From the De-noising paper

In [None]:
model = Sequential(name="DNN using CONV for Denoising")

# Convolution layers are used to extract the most prominent features of the input data.
#Layer 1:
#Conv-layer -> BatchNorm -> RELU-> AvdPooling
model.add(Conv2D(36, (19,1), input_shape=(30000,1,1),activation='relu',padding='SAME',strides=(1,1),name='conv_1')) 
model.add(BatchNormalization(name='batchnorm_1'))
model.add(Dense(units=36,activation='relu',name='relu_1'))
model.add(AveragePooling2D(pool_size=(2,1),strides=(4,1),name='avgpool_1'))

#Layer 2:
#Conv-layer -> BatchNorm -> RELU-> AvdPooling
model.add(Conv2D(36,(19,1),strides=(1,1),padding='SAME',name='conv_2'))
model.add(BatchNormalization(name='batchnorm_2'))
model.add(Dense(units=36,activation='relu',name='relu_2'))
model.add(AveragePooling2D(pool_size=(2,1),strides=(4,1),name='avgpool_2'))

#Layer 3:
#Conv-layer -> BatchNorm -> RELU-> AvdPooling
model.add(Conv2D(36,(19,1),strides=(1,1),padding='SAME',name='conv_3'))
model.add(BatchNormalization(name='batchnorm_3'))
model.add(Dense(units=36,activation='relu',name='relu_3'))
model.add(AveragePooling2D(pool_size=(2,1),strides=(4,1),name='avgpool_3'))

#Layer 4:
#Conv-layer -> BatchNorm -> RELU-> AvdPooling
model.add(Conv2D(36,(19,1),strides=(1,1),padding='SAME',name='conv_4'))
model.add(BatchNormalization(name='batchnorm_4'))
model.add(Dense(units=36,activation='relu',name='relu_4'))
model.add(AveragePooling2D(pool_size=(2,1),strides=(4,1),name='avgpool_4'))


#Layer 5:
#Conv-layer -> BatchNorm -> RELU-> AvdPooling
model.add(Conv2D(36,(19,1),strides=(1,1),padding='SAME',name='conv_5'))
model.add(BatchNormalization(name='batchnorm_5'))
model.add(Dense(units=36,activation='relu',name='relu_5'))
model.add(AveragePooling2D(pool_size=(2,1),strides=(4,1),name='avgpool_5'))


#Layer 6:
#Conv-layer -> BatchNorm -> RELU-> AvdPooling
model.add(Conv2D(36,(19,1),strides=(1,1),padding='SAME',name='conv_6'))
model.add(BatchNormalization(name='batchnorm_6'))
model.add(Dense(units=36,activation='relu',name='relu_6'))
model.add(AveragePooling2D(pool_size=(2,1),strides=(4,1),name='avgpool_6'))
model.add(Flatten())

#Now adding in the fully-connected dense layer: This will act on the data extracted from the CNN in the prior stages
model.add(Dense(units= 30000,activation=None))

model.summary()
                                    

Start defining the LSTM implementation:

In [22]:
model1 = Sequential(name="DNN using LSTM for Denoising");
#The LSTM->RELU-> return sequential ouput.
model1.add(LSTM(140,input_shape=(30000,1,),name="lstm_1",activation='relu',return_sequences=True)) 

# sequential output/input -> LSTM2->Relu -> flattened output (return_sequence = false, by default)
#The output of LSTM2 is a flattened output -> Fully connected layer.
model1.add(LSTM(140,name="lstm_2",activation='relu'))

#Add the fully connected layer of 30000 
model.add(Dense(30000,activation=None))
model1.summary()


Model: "DNN using LSTM for Denoising"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 30000, 140)        79520     
_________________________________________________________________
lstm_2 (LSTM)                (None, 140)               157360    
Total params: 236,880
Trainable params: 236,880
Non-trainable params: 0
_________________________________________________________________


Helpful references:

1)This explains why last dense layer has no activation function:
  - we primarily do that if we are trying to accuractely do regression.

https://stats.stackexchange.com/questions/361066/what-is-the-point-of-having-a-dense-layer-in-a-neural-network-with-no-activation

2)This video shows how to implement a stacked LSTM model:
https://www.youtube.com/watch?v=BSpXCRTOLJA

3) This video shows the inner anatomy of an LSTM, primarily the sigmoid and Tanh, used for gating inside the LSTM:
https://www.youtube.com/watch?v=8HyCNIVRbSU&t=632s

4) The blogpost that references the video in 3. Is linked here:
https://towardsdatascience.com/illustrated-guide-to-lstms-and-gru-s-a-step-by-step-explanation-44e9eb85bf21

