In [110]:
from keras.models import Model
from keras.layers import Dense
from keras.layers import Input, LSTM, GRU, Bidirectional, GlobalMaxPool1D, MaxPooling1D, Embedding
import numpy as np
from keras.layers import SimpleRNN # new! 
import matplotlib.pyplot as plt
from keras.layers import SpatialDropout1D, Conv1D, GlobalMaxPooling1D # new! 
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import roc_auc_score 
import keras.backend as K
if len(K.tensorflow_backend._get_available_gpus()) > 0:
  from keras.layers import CuDNNLSTM as LSTM
  from keras.layers import CuDNNGRU as GRU

In [111]:
T = 8   ### T is the sequnces lenght
D = 2   ### D is the vector length dimensionality
M = 3   ### hidden layer size

In [112]:
X = np.random.randn(1, T, D)
X

array([[[-0.44618286,  0.88019333],
        [-0.89017036,  0.26627792],
        [-1.49589394,  0.77446926],
        [-1.47523688, -0.10624239],
        [ 0.57303466, -0.81412297],
        [ 0.38048613, -1.62875916],
        [ 0.38587117, -0.82175859],
        [ 0.49133437,  0.9745502 ]]])

### In the following feedforward we have 3 Dense nuerons so all the 8 (T= sequence length) gets multiplied to each of the nuerons and produces 24 parameters in the output.  

In [113]:
def Feedforward():
  input_ = Input(shape=(T, D))
  rnn = Dense(M, activation='sigmoid')
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o= model.predict(X)
  print("Feedforward output:", o)
  print("Feedforward output.shape:", o.shape)
Feedforward()

Model: "model_128"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_90 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
dense_20 (Dense)             (None, 8, 3)              9         
Total params: 9
Trainable params: 9
Non-trainable params: 0
_________________________________________________________________
Feedforward output: [[[0.5088696  0.3436704  0.7363014 ]
  [0.5245103  0.40946135 0.7488073 ]
  [0.5396385  0.3061217  0.8854835 ]
  [0.5430566  0.43362778 0.8113289 ]
  [0.48715964 0.6535339  0.2468077 ]
  [0.49646315 0.7496458  0.19164243]
  [0.49260673 0.6449926  0.28355432]
  [0.48132563 0.37897137 0.52923524]]]
Feedforward output.shape: (1, 8, 3)


### IN RNN if we have 3 (M=3 HIDDEN UNITS) when we pass in a sequence of  8 (T=8), it goes one by one into the network and when  "return_state=True" alone, It will output the final value at T(7) (time stamp 7) one from each hidden nueron and the shape will be (1,3) and since return states is true is will also output h: the final hidden state for each of the hidden nueron (1,3) but if the return_sequnece parameter is True as well, then we will get an output vector at each time stamp from T(0) to T(7) (8 is the sequnce length) so T1 will have (1,3) from each hidden state all the way up to T(7) making the output shape to be (8,3)

In [114]:
def RNN1():
  input_ = Input(shape=(T, D))
  rnn = SimpleRNN(M,return_state=True )
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o, h= model.predict(X)
  print("RNN o:", o)
  print("RNN o.shape:", o.shape)
  print("RNN h:", h)
  print("RNN h:", h.shape)
RNN1()

Model: "model_129"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_91 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
simple_rnn_10 (SimpleRNN)    [(None, 3), (None, 3)]    18        
Total params: 18
Trainable params: 18
Non-trainable params: 0
_________________________________________________________________
RNN o: [[ 0.62241155  0.4309334  -0.56761813]]
RNN o.shape: (1, 3)
RNN h: [[ 0.62241155  0.4309334  -0.56761813]]
RNN h: (1, 3)


In [115]:
def RNN2():
  input_ = Input(shape=(T, D))
  rnn = SimpleRNN(M)
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o= model.predict(X)
  print("RNN o:", o)
  print("RNN o.shape:", o.shape)
  #print("RNN h:", h)
  #print("RNN h:", h.shape)
RNN2()

Model: "model_130"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_92 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
simple_rnn_11 (SimpleRNN)    (None, 3)                 18        
Total params: 18
Trainable params: 18
Non-trainable params: 0
_________________________________________________________________
RNN o: [[ 0.1809945 -0.8680041 -0.2581942]]
RNN o.shape: (1, 3)


In [116]:
def RNN3():
  input_ = Input(shape=(T, D))
  rnn = SimpleRNN(M,return_state=True, return_sequences=True )
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o, h= model.predict(X)
  print("RNN3 o:", o)
  print("RNN3 o.shape:", o.shape)
  print("RNN3 h:", h)
  print("RNN3 h:", h.shape)
RNN3()

Model: "model_131"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_93 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
simple_rnn_12 (SimpleRNN)    [(None, 8, 3), (None, 3)] 18        
Total params: 18
Trainable params: 18
Non-trainable params: 0
_________________________________________________________________
RNN3 o: [[[-0.313571    0.41735008  0.28294384]
  [ 0.7114741   0.56325203  0.66680974]
  [ 0.90764344  0.21954091  0.5381828 ]
  [ 0.9522016  -0.28731886  0.33200467]
  [ 0.30792224 -0.88614285 -0.66227067]
  [-0.05819295 -0.7262442  -0.6822886 ]
  [-0.5579893  -0.32315716 -0.48899856]
  [-0.94081473  0.5529743  -0.00507389]]]
RNN3 o.shape: (1, 8, 3)
RNN3 h: [[-0.94081473  0.5529743  -0.00507389]]
RNN3 h: (1, 3)


In [117]:
def lstm1():
  input_ = Input(shape=(T, D))
  rnn = LSTM(M, return_state=True)
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o, h1, c1= model.predict(X)
  print("o:", o)
  print("o.shape:", o.shape)
  print("h1:", h1)
  print("h1:", h1.shape)
  print("c1:", c1)
  print("c1:", c1.shape)
lstm1()

Model: "model_132"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_94 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
lstm_29 (LSTM)               [(None, 3), (None, 3), (N 72        
Total params: 72
Trainable params: 72
Non-trainable params: 0
_________________________________________________________________
o: [[ 0.00699272  0.02722789 -0.06296945]]
o.shape: (1, 3)
h1: [[ 0.00699272  0.02722789 -0.06296945]]
h1: (1, 3)
c1: [[ 0.01578255  0.04722785 -0.13898395]]
c1: (1, 3)


In [118]:
def lstm2():
  input_ = Input(shape=(T, D))
  rnn = LSTM(M, return_state=True, return_sequences=True)
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o, h1, c1= model.predict(X)
  print("o:", o)
  print("o.shape:", o.shape)
  print("h1:", h1)
  print("h1:", h1.shape)
  print("c1:", c1)
  print("c1:", c1.shape)
lstm2()

Model: "model_133"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_95 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
lstm_30 (LSTM)               [(None, 8, 3), (None, 3), 72        
Total params: 72
Trainable params: 72
Non-trainable params: 0
_________________________________________________________________
o: [[[-0.02079856 -0.04695788 -0.02597704]
  [-0.11912648 -0.08934309  0.01937718]
  [-0.21947117 -0.16449665  0.03939017]
  [-0.3540731  -0.22440933  0.14139205]
  [-0.10254806 -0.12842722  0.22566453]
  [-0.07818401 -0.06015252  0.33402833]
  [-0.01837086 -0.04505     0.24178053]
  [ 0.04082727 -0.06423474  0.05443963]]]
o.shape: (1, 8, 3)
h1: [[ 0.04082727 -0.06423474  0.05443963]]
h1: (1, 3)
c1: [[ 0.10900966 -0.17327632  0.12877311]]
c1: (1, 3)


In [119]:
def gru1():
  input_ = Input(shape=(T, D))
  rnn = GRU(M, return_state=True)
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  o, h = model.predict(X)
  print("o:", o)
  print("h:", h)
gru1()

o: [[ 0.22681835  0.13234696 -0.06720442]]
h: [[ 0.22681835  0.13234696 -0.06720442]]


In [120]:
def gru2():
  input_ = Input(shape=(T, D))
  rnn = GRU(M, return_state=True, return_sequences=True)
  x = rnn(input_)

  model = Model(inputs=input_, outputs=x)
  model.summary()
  o, h = model.predict(X)
  print("o:", o)
  print("h:", h)
gru2()

Model: "model_135"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_97 (InputLayer)        (None, 8, 2)              0         
_________________________________________________________________
gru_8 (GRU)                  [(None, 8, 3), (None, 3)] 54        
Total params: 54
Trainable params: 54
Non-trainable params: 0
_________________________________________________________________
o: [[[-0.032772   -0.222101   -0.27413872]
  [ 0.03588651 -0.3932826  -0.36444336]
  [ 0.06800756 -0.5964422  -0.5277087 ]
  [ 0.17930047 -0.6431465  -0.5011515 ]
  [-0.02837418 -0.12780999  0.09267104]
  [ 0.17021193  0.30628246  0.5586179 ]
  [ 0.1879003   0.42520466  0.5834806 ]
  [ 0.0690013   0.27580395  0.09497865]]]
h: [[0.0690013  0.27580395 0.09497865]]


In [121]:
def bidirectional():
 input_ = Input(shape=(T, D))
 rnn = Bidirectional(LSTM(M, return_state=True, return_sequences=True))
 #rnn = Bidirectional(LSTM(M, return_state=True, return_sequences=True))
 x = rnn(input_)

 model = Model(inputs=input_, outputs=x)
 o, h1, c1, h2, c2 = model.predict(X)
 print("o:", o)
 print("o.shape:", o.shape)
 print("h1:", h1)
 print("h1:", h1.shape)
 print("c1:", c1)
 print("c1:", c1.shape)
 print("h2:", h2)
 print("c2:", c2)
bidirectional()

o: [[[ 0.08144294 -0.0144922   0.02785569  0.01346204  0.15914294
    0.12310418]
  [ 0.11100964  0.021185   -0.02884955  0.10869668  0.17691101
    0.04310717]
  [ 0.12836488  0.03137673 -0.07398399  0.12803043  0.17053956
   -0.00232227]
  [ 0.14327629  0.0727216  -0.13148333  0.12695935  0.11495031
   -0.08179277]
  [ 0.01370428  0.09521818 -0.11657449  0.00400825  0.01867541
   -0.21141785]
  [-0.22860189  0.13200381 -0.12908387  0.00555963  0.03784036
   -0.17415155]
  [-0.30896026  0.07103384 -0.11853712 -0.05564697 -0.02674422
   -0.06634852]
  [-0.20136945 -0.08543979  0.01772762 -0.08496039 -0.08742456
    0.07500429]]]
o.shape: (1, 8, 6)
h1: [[-0.20136945 -0.08543979  0.01772762]]
h1: (1, 3)
c1: [[-0.3464731  -0.19255051  0.03358482]]
c1: (1, 3)
h2: [[0.01346204 0.15914294 0.12310418]]
c2: [[0.02811301 0.28506312 0.24347708]]


In [122]:
def stacked():
 input_ = Input(shape=(T, D))
 rnn = LSTM(M, return_sequences=True)
 x = rnn(input_)
 rnn1 = LSTM(M,return_sequences=True)
 y = rnn1(x)
 pool=GlobalMaxPool1D()
 z =pool(y)

 k = Dense(2, activation="sigmoid")(z)

 model3 = Model(inputs=input_, outputs=k)        
 model2 = Model(inputs=input_, outputs=y)
 model = Model(inputs=input_, outputs=z)
 o = model.predict(X)
 print("o:", o)
 print("o.shape:", o.shape)
 o1 = model2.predict(X)
 print("o1:", o1)
 print("o.shape:", o1.shape)
 o2 = model3.predict(X)
 print("o2:", o2)
 print("o2.shape:", o2.shape)
    
 
 #print("h1:", h1)
 #print("h1:", h1.shape)
 #print("c1:", c1)
 #print("c1:", c1.shape)
 #print("h2:", h2)
 #print("c2:", c2)
stacked()

o: [[-0.00954142 -0.00746712  0.1146282 ]]
o.shape: (1, 3)
o1: [[[-0.00954142 -0.00746712  0.01006698]
  [-0.02308757 -0.02658959  0.03071595]
  [-0.03544412 -0.06057581  0.06082741]
  [-0.05449306 -0.0934532   0.0907956 ]
  [-0.07830082 -0.10630091  0.10217678]
  [-0.09839532 -0.12024578  0.1146282 ]
  [-0.10506757 -0.12314402  0.10794672]
  [-0.09300842 -0.09726901  0.07279814]]]
o.shape: (1, 8, 3)
o2: [[0.49007878 0.52763605]]
o2.shape: (1, 2)


In [123]:
M=25

def conv1d():
 input_ = Input(shape=(T, D))
 rnn = LSTM(3, return_sequences=True)
 x = rnn(input_)
 conv = Conv1D(3, 2, strides=2, activation='relu')
 z= conv(x)
 
 pool=GlobalMaxPool1D()
 k =pool(z)
    
 m = Dense(1, activation="sigmoid")(k)

 model_lstm = Model(inputs=input_, outputs=x)
 o_lstm = model_lstm.predict(X)
 print("o_lstm:", o_lstm)
 print("o_lstm.shape:", o_lstm.shape)

 model = Model(inputs=input_, outputs=z)
 o = model.predict(X)
 print("o:", o)
 print("o.shape:", o.shape)

 model1 = Model(inputs=input_, outputs=k)
 o1 = model1.predict(X)
 print("o1:", o1)
 print("o1.shape:", o1.shape)

 model2 = Model(inputs=input_, outputs=m)
 o2 = model2.predict(X)
 print("o2:", o2)
 print("o2.shape:", o2.shape)
 
 
 #print("h1:", h1)
 #print("h1:", h1.shape)
 #print("c1:", c1)
 #print("c1:", c1.shape)
 #print("h2:", h2)
 #print("c2:", c2)
conv1d()

o_lstm: [[[-0.15176934 -0.17192757 -0.08958323]
  [-0.28058323 -0.21000624 -0.15712407]
  [-0.4789133  -0.3390882  -0.21852125]
  [-0.5469998  -0.30317464 -0.26870087]
  [-0.2458614  -0.21090764 -0.0604994 ]
  [-0.04673616 -0.09291019  0.19289699]
  [ 0.06567791 -0.00607281  0.24865761]
  [-0.00932621 -0.06075051  0.1191488 ]]]
o_lstm.shape: (1, 8, 3)
o: [[[0.         0.         0.        ]
  [0.         0.         0.        ]
  [0.07827584 0.         0.        ]
  [0.13706987 0.22284247 0.        ]]]
o.shape: (1, 4, 3)
o1: [[0.13706987 0.22284247 0.        ]]
o1.shape: (1, 3)
o2: [[0.4921129]]
o2.shape: (1, 1)


In [124]:
def conv2d():
 input_ = Input(shape=(T, D))
 conv = Conv1D(8, 4, activation='relu')
 z= conv(input_)
 model = Model(inputs=input_, outputs=z)
 o = model.predict(X)
 print("o:", o)
 print("o.shape:", o.shape)

conv2d()

o: [[[0.10587019 0.5994359  0.01610544 0.         0.         0.46499228
   0.52529055 0.        ]
  [0.12000483 0.         0.08951229 0.         0.04046664 0.30610287
   0.13834925 0.08928944]
  [0.         0.1547158  1.3044906  0.7653545  0.         0.22515121
   0.47719976 0.83462465]
  [0.03537676 0.30171108 0.840675   0.9079163  0.         0.
   0.44028392 0.57366836]
  [0.         0.22010969 0.         0.8329048  0.         0.
   0.         0.6720611 ]]]
o.shape: (1, 5, 8)


In [125]:
def conv2d():
 input_ = Input(shape=(T, D))
 conv = Conv1D(6, 2, strides=1, activation='relu')
 z= conv(input_)

 pool=MaxPooling1D(2)
 k =pool(z)

 conv1 = Conv1D(2,2, strides=1, activation='relu')
 x= conv1(k)


 model = Model(inputs=input_, outputs=z)
 o = model.predict(X)
 print("o:", o)
 print("o.shape:", o.shape)


 model_gmp = Model(inputs=input_, outputs=k)
 o_gmp = model_gmp.predict(X)
 print("o_gmp:", o_gmp)
 print("o_gmp.shape:", o_gmp.shape)

 model_conv1 = Model(inputs=input_, outputs=x)
 o_conv1 = model_conv1.predict(X)
 print("o_conv1:", o_conv1)
 print("o_conv1.shape:", o_conv1.shape)
conv2d()

o: [[[0.5870526  0.14377053 0.805825   0.8526427  0.         0.        ]
  [1.0503271  0.13416599 0.6050166  0.8548841  0.10155728 0.44733346]
  [0.9637931  0.08674317 1.1254237  0.914521   0.23590168 0.        ]
  [0.24759188 0.         0.         0.         0.42054337 0.        ]
  [0.         0.         0.         0.         0.7089676  0.        ]
  [0.         0.         0.         0.         0.8636816  0.5877091 ]
  [0.15279739 0.         0.         0.         0.         0.66053605]]]
o.shape: (1, 7, 6)
o_gmp: [[[1.0503271  0.14377053 0.805825   0.8548841  0.10155728 0.44733346]
  [0.9637931  0.08674317 1.1254237  0.914521   0.42054337 0.        ]
  [0.         0.         0.         0.         0.8636816  0.5877091 ]]]
o_gmp.shape: (1, 3, 6)
o_conv1: [[[0.        1.3526652]
  [0.        0.5773834]]]
o_conv1.shape: (1, 2, 2)
