In [1]:
import tensorflow as tf, numpy as np

In [2]:
from tokenizer import Vocabulary
from embedding import EmbeddingLayer
from lstm import LSTM

### MY EMBEDDING VS KERAS

In [4]:
f = open(r"data\alice_wonderland.txt", 'r', encoding='utf-8').readlines()

## create vocabulary + tokenize
vocab = Vocabulary()
token_sequences = vocab.tokenize(f, 26)

## create embedding layer
embedding = EmbeddingLayer(vocab_size=vocab.size, hidden_dim=50) ## hidden_dim is a hyper-param

## create X & Y datasets
X = token_sequences[:,:-1]
y = token_sequences[:,-1]

lstm_inputs = embedding.predict(X)
lstm_inputs.shape ## batch_size x seq_length x dimensionality

(2829, 25, 50)

In [5]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab.size, 50, input_length=25))
model.compile('rmsprop', 'mse')

output_array = model.predict(X)
output_array.shape



(2829, 25, 50)

In [6]:
lstm_inputs.shape==output_array.shape

True

### MY LSTM VS KERAS

In [7]:
klstm = tf.keras.layers.LSTM(100)

output = klstm(output_array)

output.shape

TensorShape([2829, 100])

In [8]:
klstm1 = tf.keras.layers.LSTM(100)

output1 = klstm1(lstm_inputs)

output1.shape

TensorShape([2829, 100])

In [9]:
lstm_weights = klstm1.get_weights()

In [10]:
nlstm = LSTM(100, vocab.seq_length, lstm_inputs.shape[0], vocab.size, embedding.hidden_dim)
nlstm._init_params()

In [18]:
def get_LSTM_UWb(weight):
    '''
    weight must be output of LSTM's layer.get_weights()
    W: weights for input
    U: weights for hidden states
    b: bias
    '''
    warr,uarr, barr = weight
    gates = ["i","f","c","o"]
    hunit = uarr.shape[0]
    U, W, b = {},{},{}
    for i1,i2 in enumerate(range(0,len(barr),hunit)):
        
        W[gates[i1]] = warr[:,i2:i2+hunit]
        U[gates[i1]] = uarr[:,i2:i2+hunit]
        b[gates[i1]] = barr[i2:i2+hunit].reshape(hunit,1)
    return(W,U,b)

W, U, b = get_LSTM_UWb(lstm_weights)

In [11]:
print(W['i'].shape, U['i'].shape, b['i'].shape)
print(W['f'].shape, U['f'].shape, b['f'].shape)
print(W['c'].shape, U['c'].shape, b['c'].shape)
print(W['o'].shape, U['o'].shape, b['o'].shape)

(50, 100) (100, 100) (100, 1)
(50, 100) (100, 100) (100, 1)
(50, 100) (100, 100) (100, 1)
(50, 100) (100, 100) (100, 1)


In [12]:
print(nlstm.W_i.shape, nlstm.h.shape, nlstm.b_i.shape)
print(nlstm.W_f.shape, nlstm.h.shape, nlstm.b_f.shape)
print(nlstm.W_g.shape, nlstm.h.shape, nlstm.b_g.shape)
print(nlstm.W_o.shape, nlstm.h.shape, nlstm.b_o.shape)

(100, 125) (100, 50) (100, 1)
(100, 125) (100, 50) (100, 1)
(100, 125) (100, 50) (100, 1)
(100, 125) (100, 50) (100, 1)


In [23]:
hidden = 100

kernel = nlstm._init_orthogonal(np.random.randn(lstm_inputs.shape[-1], hidden * 4))
kernel_i = kernel[:, :hidden]
kernel_f = kernel[:, hidden: hidden * 2]
kernel_c = kernel[:, hidden * 2: hidden * 3]
kernel_o = kernel[:, hidden * 3:]

In [24]:
print(kernel_i.shape, kernel_f.shape, kernel_c.shape, kernel_o.shape)

(50, 100) (50, 100) (50, 100) (50, 100)


In [25]:
recurrent_kernel = nlstm._init_orthogonal(np.random.randn(hidden, hidden * 4))
recurrent_kernel_i = recurrent_kernel[:, :hidden]
recurrent_kernel_f = recurrent_kernel[:, hidden: hidden * 2]
recurrent_kernel_c = recurrent_kernel[:, hidden * 2: hidden * 3]
recurrent_kernel_o = recurrent_kernel[:, hidden * 3:]

In [26]:
print(recurrent_kernel_i.shape, recurrent_kernel_f.shape, recurrent_kernel_c.shape, recurrent_kernel_o.shape)

(100, 100) (100, 100) (100, 100) (100, 100)


In [27]:
bias = np.random.randn(hidden * 4, )
bias_i = bias[:hidden]
bias_f = bias[hidden: hidden * 2]
bias_c = bias[hidden * 2: hidden * 3]
bias_o = bias[hidden * 3:]

In [28]:
print(bias_i.shape, bias_f.shape, bias_c.shape, bias_o.shape)

(100,) (100,) (100,) (100,)


In [29]:
h_tm1 = np.zeros((2829, hidden))
h_tm1[0].shape

(100,)

In [42]:
def f_test(inputs, hidden, return_sequences=False):
    h_tm1 = np.zeros((hidden,))
    c_tm1 = np.zeros((hidden,))
    
    h_state_out = []
    
    for batch in inputs:
    
        inputs_i = batch
        inputs_f = batch
        inputs_c = batch
        inputs_o = batch

        h_tm1_i = h_tm1
        h_tm1_f = h_tm1
        h_tm1_c = h_tm1
        h_tm1_o = h_tm1

        x_i = np.dot(inputs_i, kernel_i) + bias_i
        x_f = np.dot(inputs_f, kernel_f) + bias_f
        x_c = np.dot(inputs_c, kernel_c) + bias_c
        x_o = np.dot(inputs_o, kernel_o) + bias_o

        f = nlstm.sigmoid(x_f + np.dot(h_tm1_f, recurrent_kernel_f))
        i = nlstm.sigmoid(x_i + np.dot(h_tm1_i, recurrent_kernel_i))
        o = nlstm.sigmoid(x_o + np.dot(h_tm1_o, recurrent_kernel_o))
        cbar = nlstm.sigmoid(x_c + np.dot(h_tm1_c, recurrent_kernel_c))
        c = (f * c_tm1) + (i * cbar)
        ht = o * nlstm.tanh(c)
        
        if return_sequences == True:
            h_state_out.append(ht)
        else:
            h_state_out.append(ht[-1])
        
        h_tm1 = ht
        c_tm1 = c
    
    return np.array(h_state_out)

# test_out = f_test(lstm_inputs, 100)
# test_out.shape

In [43]:
test_out1 = f_test(lstm_inputs, 100)
test_out1.shape

(2829, 100)

In [35]:
klstm1 = tf.keras.layers.LSTM(100, return_sequences=True)

output1 = klstm1(lstm_inputs)

output1.shape

TensorShape([2829, 25, 100])

In [38]:
test_out[0].shape

(25, 100)

In [39]:
output1[0].shape

TensorShape([25, 100])

In [67]:
state_size = [100,100]
flat_dims = tf.TensorShape(state_size).as_list()

In [70]:
init_state_size = [2829] + flat_dims
t = tf.zeros(init_state_size)

In [71]:
t.shape

TensorShape([2829, 100, 100])

In [74]:
t[0].shape

TensorShape([100, 100])

In [77]:
output1[0]

<tf.Tensor: shape=(100,), dtype=float32, numpy=
array([-0.06452696, -0.05335967,  0.27230436,  0.08514202, -0.1366195 ,
       -0.03089291, -0.09995215, -0.10938603, -0.21541286, -0.16333014,
        0.11085184,  0.02595731,  0.07698277, -0.24887118,  0.01225445,
       -0.02336389,  0.03856047,  0.22259717,  0.24349211,  0.05290796,
       -0.10337024,  0.02625424,  0.10869118,  0.01917277,  0.06314739,
       -0.16500454,  0.04420947,  0.16458632, -0.01336981,  0.17161603,
        0.12315603, -0.19418809,  0.05854382,  0.10265537, -0.22877784,
        0.2929678 ,  0.02486943, -0.06912109,  0.13993773,  0.03032602,
        0.20207252,  0.01584396, -0.00444488,  0.24728256, -0.09831952,
       -0.02874447,  0.03790721,  0.0167274 ,  0.08847462, -0.10701214,
       -0.04409536,  0.19067554, -0.15128121,  0.13191572, -0.10877167,
        0.02176062,  0.08408661,  0.01290146,  0.07844293,  0.19560237,
        0.2011749 ,  0.03956545, -0.12428977,  0.10971019,  0.22808734,
        0.023791