In [1]:
# import pandas as pd
#  import matplotlib.pyplot as plt
from activation_functions import tanh_activation, sigmoid
import numpy as np
import pandas as pd
import json
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import re
from sklearn.metrics import mean_squared_error

# HyperParameters:
embed_dim = 14
lstm_out = 50
batch_size = 32
num_words = 2500


In [2]:
## Preparing dataset

def convert(x):
    """
    Coverting JSON to pandas dataframe

    """    
    ob = json.loads(x)
    for k, v in ob.items():
        if isinstance(v, list):
            ob[k] = ','.join(v)
        elif isinstance(v, dict):
            for kk, vv in v.items():
                ob['%s_%s' % (k, kk)] = vv
            del ob[k]
    return ob



def filter_data(data):
    """
    Converting into pandas dataframe and filtering only text and ratings given by the users
    """

    df = pd.DataFrame([convert(line) for line in data])
    df.drop(columns=df.columns.difference(['text','stars']),inplace=True)
    df.loc[:, ("sentiment")] = 0
    

#     #I have considered a rating above 3 as positive and less than or equal to 3 as negative.
    df.loc[:,'sentiment']=['pos' if (x>3) else 'neg' for x in df.loc[:, 'stars']]
    df.loc[:,'text'] = df.loc[:,'text'].apply(lambda x: x.lower())
    df.loc[:,'text'] = df.loc[:,'text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]','',x)))
    for idx,row in df.iterrows():
        df.loc[:,'text']= [x for x in df.loc[:,'text']]
    return df


In [3]:
json_filename = 'review_mockup.json'
with open(json_filename,'rb') as f:
    data = f.readlines()
data = filter_data(data)
tokenizer = Tokenizer(num_words = num_words, split=' ')
tokenizer.fit_on_texts(data.loc[:,'text'].values)
#print(tokenizer.word_index)  # To see the dicstionary
X = tokenizer.texts_to_sequences(data.loc[:,'text'].values)
X = pad_sequences(X)
# print((X[0]))

In [28]:
class MY_LSTM:
    def __init__(self, hidden_size, input_size, optimizer, loss_func):
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.output_size = 2
        self._optimizer = optimizer
        self._loss_func = loss_func
        self.USE_OPTIMIZER = True


        # input_size = 2
        # hidden_size = 1
        # z_size = 24
        z_size = self.hidden_size + self.input_size
        """
        Init
        
        """
        self.parameters = {
            "weights": {
                "W_Forget": self.__init_orthogonal(np.zeros((self.hidden_size, z_size))),
                "W_Input": self.__init_orthogonal(np.zeros((self.hidden_size, z_size))),
                "W_Candidate": self.__init_orthogonal(np.zeros((self.hidden_size, z_size))),
                "W_Output": self.__init_orthogonal(np.zeros((self.hidden_size, z_size))),
                "W_OutputSoftmax": self.__init_orthogonal(np.zeros((self.hidden_size, self.output_size))),

            },
            "bias": {
                "b_Forget": np.ones((self.hidden_size, 1)),
                "b_Input": np.ones((self.hidden_size, 1)),
                "b_Candidate": np.ones((self.hidden_size, 1)),
                "b_Output": np.ones((self.hidden_size, 1)),
                "b_OutputSoftmax": np.ones((self.input_size, 1)),

            }
        }

    @property
    def optimizer(self):
        return self._optimizer

    @optimizer.setter
    def optimizer(self, optimizer):
        self._optimizer = optimizer


    @property
    def loss_func(self):
        return self._loss_func

    @loss_func.setter
    def loss_func(self, loss_func):
        self.loss_func = loss_func


    def __init_orthogonal(self, param):
        """
        Initializes weight parameters orthogonally.
        This is a common initiailization for recurrent neural networks.

        Refer to this paper for an explanation of this initialization:
            https://arxiv.org/abs/1312.6120
        """
        if param.ndim < 2:
            raise ValueError("Only parameters with 2 or more dimensions are supported.")

        rows, cols = param.shape

        new_param = np.random.randn(rows, cols)

        if rows < cols:
            new_param = new_param.T

        # Compute QR factorization
        q, r = np.linalg.qr(new_param)

        d = np.diag(r, 0)
        ph = np.sign(d)
        q *= ph

        if rows < cols:
            q = q.T

        new_param = q

        return new_param

    @property
    def get_parameters(self):
        """
        Returns weights and biases as 2d array

        """
        return self.parameters

    def forward(self, input_data, prev_stm, prev_ltm):
        """
        Arguments:
            input_data -- your input data at timestep "t", numpy array of shape (n_x, m).
            prev_stm -- h at timestep "t-1", numpy array of shape (n_a, m)
            prev_ltm -- c at timestep "t-1", numpy array of shape (n_a, m)
        Returns:
            outputs -- prediction at timestep "t", numpy array of shape (n_v, m)
            
            Weight shape:
            All of the weights    shape (self.hidden_size, z_size)
            Except W_OutputSoftmax      (self.hidden_size, self.output_size)
            
            Data shapes:
            input_data            shape (1,1)
            prev_stm and prev_ltm shape (1,hidden_state)
            concat                shape is (1, z_size)
            concat.T              shape is (z_size, 1)
             - forget_gate        shape is (hidden_size, 1)
             - input_gate         shape is (hidden_size, 1)
             - candidate          shape is (hidden_size, 1)
             - next_ltm           shape is (1, hidden_size)
        """

        # Save a list of computations for each of the components in the LSTM
        
        concat = np.concatenate((prev_stm, input_data), axis=1)
#         print(f"Concat value is: \n", concat, "\nConcat.T value is: \n", concat.T)
        
        # Compute the forget gate
#         print("=======Computation of the forget gate:======")
#         print(f"W_forget:\n {self.parameters['weights']['W_Forget']}\nconcat.T:\n {concat.T}\nb_Forget: {self.parameters['bias']['b_Forget']}")
        forget_gate = sigmoid(np.dot(self.parameters["weights"]["W_Forget"], concat.T) + self.parameters["bias"]["b_Forget"])
#         print(f"Forget_gate value is: \n", forget_gate)
#         print("===========================================")


        # Compute the input gate
        input_gate = sigmoid(np.dot(self.parameters["weights"]["W_Input"], concat.T) + self.parameters["bias"]["b_Input"])
#         print(f"Input_gate value is: \n", input_gate)

        # Compute the candidate cell value
        candidate = np.tanh(np.dot(self.parameters["weights"]["W_Candidate"], concat.T) + self.parameters["bias"]["b_Candidate"])
#         print(f"Candidate_gate value is: \n", candidate)

        # Compute the memory cell
#         print(f"==============next_ltm===========\n forget_gate * prev_ltm:\n {forget_gate * prev_ltm}")
#         print(f"input_gate * candidate: \n {input_gate * candidate}")
        next_ltm = forget_gate * prev_ltm.T + input_gate * candidate
#         print(f"next_ltm value is: \n", next_ltm)
#         print("==================================")


        # Compute the output gate
        output_gate = sigmoid(np.dot(self.parameters["weights"]["W_Output"], concat.T) + self.parameters["bias"]["b_Output"])
#         print(f"Output_gate value is: \n", output_gate)

        # Compute the next hidden state
        next_stm = output_gate * np.tanh(next_ltm)
#         print(f"next_stm value is: \n", next_stm)

        forward_pass = {
            "Forget": forget_gate,
            "Input": input_gate,
            "Candidate": candidate,
            "Output": output_gate,
            "next_ltm": next_ltm.T,
            "next_stm": next_stm.T,
            "Concat_Input": concat
        }
        
        return forward_pass

    def __cross_entropy(self, predictions, targets, epsilon=1e-12):
        """
        Computes cross entropy between targets (encoded as one-hot vectors)
        and predictions.
        Input: predictions (N, k) ndarray
               targets (N, k) ndarray
        Returns: scalar
        """
        predictions = np.clip(predictions, epsilon, 1. - epsilon)
        N = predictions.shape[0]
        ce = -np.sum(targets * np.log(predictions + 1e-9)) / N
        return ce


    def calculate_loss(self, prediction, targets):
        print(f"============IN LOSS=============")
        print(f"prediction: {prediction}")
        print(f"targets: {targets}")

        return self.loss_func(prediction, targets)

    def backward(self, forward_pass, prediction, targets):
        """
        Arguments:
        forward_pass -- dictionary:
                        "Forget_gate": forget_gate,
                        "Input_gate": input_gate,
                        "Candidate_gate": candidate,
                        "Output_gate": output_gate,
                        "next_ltm": next_ltm.T,
                        "next_stm": next_stm.T
                        
        targets -- your targets as a list of size m.
        Returns:
        loss -- crossentropy loss for all elements in output
        grads -- lists of gradients of every element in p
        
        Weight shape:
            All of the weights    shape (self.hidden_size, z_size)
            Except W_OutputSoftmax      (self.hidden_size, self.output_size)

        """

        gradients = {
            "weights": {
                "W_Forget": np.zeros_like(self.parameters["weights"]["W_Forget"]),
                "W_Input": np.zeros_like(self.parameters["weights"]["W_Input"]),
                "W_Candidate": np.zeros_like(self.parameters["weights"]["W_Candidate"]),
                "W_Output": np.zeros_like(self.parameters["weights"]["W_Output"]),
                "W_OutputSoftmax": np.zeros_like(self.parameters["weights"]["W_OutputSoftmax"]),
            }
#             "bias": {
#                 "b_Forget": np.zeros_like(self.parameters["bias"]["b_Forget"]),
#                 "b_Input": np.zeros_like(self.parameters["bias"]["b_Input"]),
#                 "b_Candidate": np.zeros_like(self.parameters["bias"]["b_Candidate"]),
#                 "b_Output": np.zeros_like(self.parameters["bias"]["b_Output"]),
#                 "b_OutputSoftmax": np.zeros_like(self.parameters["bias"]["b_OutputSoftmax"]),
#             }
        }

        # Set the next cell and hidden state equal to zero
        print(f"==============PERFORMING BACKWARD===========\n")
        print(f"INPUTS:\nforward_pass: \n{forward_pass}\ntargets: \n{targets}\nprediction: \n{prediction}")
        print(f"\n==============END OF INPUTS=================\n")
        
        print("\n===============PROCESSING------------------\n")
#         next_stm = np.zeros_like(forward_pass["next_stm"])  # h
#         next_ltm = np.zeros_like(forward_pass["next_ltm"])  # C
        next_ltm = forward_pass["next_ltm"]
        next_stm = forward_pass['next_stm']

        loss = 0
        # Compute the cross entropy
        
        for t in reversed(range(self.hidden_size)):

            loss += self.loss_func(prediction[0].tolist(), targets)
            print(f"[{t}] LOSS:  {loss}")
            # Get the previous hidden cell state



            # Compute the derivative of the relation of the hidden-state to the output gate
            dv = np.copy(prediction)
            dv[np.argmax(targets)] -= 1
            print(f"[{t}] dv (OUTPUT_GATE):  \n{dv}\n")
            # Update the gradient of the relation of the hidden-state to the output gate
            print(f"[{t}] dv (OUTPUT_GATE):  \n{dv}\n")
            print(f"[{t}] gradients[W_OutputSoftmax]:  \n{gradients['weights']['W_OutputSoftmax']}\n")
            gradients["weights"]["W_OutputSoftmax"] += np.dot(next_stm.T, dv)
            #gradients["bias"]["b_OutputSoftmax"] += dv.T 


            # Compute the derivative of the hidden state and output gate
            dh = np.dot(dv, self.parameters["weights"]["W_OutputSoftmax"].T)
            print(f"[{t}] dh:  \n{gradients['weights']['W_OutputSoftmax']}\n")
            dh += next_stm

            do = dh * tanh_activation(next_ltm)
            print(f"[{t}] do: BEFORE SIGMOID \n{do}\n")
            do = sigmoid(forward_pass["Output"], derivative=True) * do
            print(f"[{t}] do: AFTER SIGMOID \n{do}\n")

            # Update the gradients with respect to the output gate
            print(f"[{t}] gradients[W_Output] BEFORE UPDATE:  \n{gradients['weights']['W_Output']}\n")
            # =========================== ?????????DO WE NEED Concat_Input?????????????====================
            gradients["weights"]["W_Output"] += np.dot(do, forward_pass["Concat_Input"].T)
            #gradients["bias"]["b_Output"] += do
            print(f"[{t}] gradients[W_Output] AFTER UPDATE:  \n{gradients['weights']['W_Output']}\n")


            # Compute the derivative of the cell state and candidate g
            dC = np.copy(ltm_next)
            dC += dh * forward_pass["Output"] * tanh_activation(next_ltm, derivative=True)
            print(f"[{t}] dC:  \n{dC}\n")

            dg = dC * forward_pass["Input"]
            dg = tanh_activation(forward_pass["Candidate"], derivative=True) * dg
            print(f"[{t}] dg:  \n{dg}\n")


            # Update the gradients with respect to the candidate
            print(f"[{t}] gradients[W_Candidate] BEFORE UPDATE:  \n{gradients['weights']['W_Candidate']}\n")
            # =========================== ?????????DO WE NEED Concat_Input?????????????====================
            gradients["weights"]["W_Candidate"] += np.dot(dg, forward_pass["Concat_Input"].T)
            #gradients["bias"]["b_Output"] += dg
            print(f"[{t}] gradients[W_Candidate] AFTER UPDATE:  \n{gradients['weights']['W_Candidate']}\n")


            # Compute the derivative of the input gate and update its gradients
            di = dC * forward_pass["Candidate"]
            di = sigmoid(forward_pass["Input"], True) * di
            print(f"[{t}] di:  \n{di}\n")

            print(f"[{t}] gradients[W_Input] BEFORE UPDATE:  \n{gradients['weights']['W_Input']}\n")
            # =========================== ?????????DO WE NEED Concat_Input?????????????====================
            gradients["weights"]["W_Input"] += np.dot(di, forward_pass["Concat_Input"].T)
            #gradients["bias"]["b_Input"] += di
            print(f"[{t}] gradients[W_Input] AFTER UPDATE:  \n{gradients['weights']['W_Input']}\n")


            # Compute the derivative of the forget gate and update its gradients
            df = dC * ltm_prev
            df = sigmoid(forward_pass["Forget"][t]) * df
            print(f"[{t}] df:  \n{df}\n")


            print(f"[{t}] gradients[W_Forget] BEFORE UPDATE:  \n{gradients['weights']['W_Forget']}\n")
            # =========================== ?????????DO WE NEED Concat_Input?????????????====================
            gradients["weights"]["W_Forget"] += np.dot(df, forward_pass["Concat_Input"].T)
            #gradients["bias"]["b_Forget"] += df
            print(f"[{t}] gradients[W_Forget] AFTER UPDATE:  \n{gradients['weights']['W_Forget']}\n")


            # Compute the derivative of the input and update the gradients of the previous hidden and cell state
            dz = (np.dot(self.parameters["weights"]["W_Forget"].T, df) + np.dot(self.parameters["weights"]["W_Input"].T, di) + np.dot(
                self.parameters["weights"]["W_Candidate"].T, dg) + np.dot(self.parameters["weights"]["W_Output"].T, do))
            print(f"[{t}] dz:  \n{dz}\n")

            dh_prev = dz[:self.hidden_size, :]
            print(f"[{t}] dh_prev:  \n{dh_prev}\n")

            dC_prev = forward_pass["Forget"] * dC
            print(f"[{t}] dC_prev:  \n{dC_prev}\n")


            # Clip gradients
            print(f"=========\nGRADS BEFORE CLIP:  \n{grads}\n")

            grads = self.__clip_gradient_norm(gradients)

            print(f"=========\nGRADS AFTER CLIP:  \n{grads}\n")


        return loss, grads

    def __clip_gradient_norm(self, grads, max_norm=0.25):
        """
        Clips gradients to have a maximum norm of `max_norm`.
        This is to prevent the exploding gradients problem.
        """
        # Set the maximum of the norm to be of type float
        max_norm = float(max_norm)
        total_norm = 0
        # Calculate the L2 norm squared for each gradient and add them to the total norm
        for gate, grad in grads["weights"].items():
            grad_norm = np.sum(np.power(grad, 2))
            total_norm += grad_norm
        total_norm = np.sqrt(total_norm)
        # Calculate clipping coeficient
        clip_coef = max_norm / (total_norm + 1e-6)
        # If the total norm is larger than the maximum allowable norm, then clip the gradient
        if clip_coef < 1:
            for gate, grad in grads["weights"].items():
                grad *= clip_coef
        return grads

    def update_parameters(self, grads, t, lr=0.01):
        # Take a step
        parameters = self.get_parameters["weights"]
        if (self.USE_OPTIMIZER):
            updated_parameters = self.optimizer(parameters=parameters, gradients=grads, learning_rate=lr, t=t)
        else:
            for (_, parameter), (_, grad) in zip(parameters.items(), grads.items()):
                parameter -= lr * grad

In [29]:
"""
TESTING OF FORWARD PASS WITH DUMMY VALUES
"""

np.random.seed(1337)
# Example data dimensions
batch_size = 1  # Number of training examples
input_size = 1  # Number of features in the input
hidden_state = 6  # Number of units in the hidden state/memory cell
model = my_build_model(hidden_size=hidden_state, input_size = input_size)
n_y = 1  # Number of units in the output

z_size = hidden_state + input_size
# Generate example input data, previous hidden state, and previous memory cell
x = np.array([[9]])
prev_stm = np.zeros((batch_size, hidden_state))
prev_ltm = np.zeros((batch_size, hidden_state))
print("\n=========Printing for MY_LSTM===============\n")
print("INPUTS\n")
print("(x) Input:\n", x)
print("(prev_stm) {h} Previous hidden state:\n", prev_stm)
print("(prev_ltm) {c} Previous memory cell:\n", prev_ltm)
print("\n=========PERFORM FORWARD PASS==============\n")

# Perform forward pass
print("\n=========APPLY SOFTMAX (LAST LAYER OF LSTM)\n")
forward_pass = model.forward(input_data=x, prev_stm=prev_stm, prev_ltm=prev_ltm)
reshaped_output = forward_pass['Output'].reshape(hidden_state)

# Print the output
print("===========OUTPUT============================")
print("(next_ltm) {h} Next hidden state:\n", forward_pass["next_stm"])
print("(next_stm) {c} Next memory cell:\n", forward_pass["next_ltm"])
print("(Output_gate) RESHAPED:\n", forward_pass['Output'].reshape(hidden_state))
output_softmax = np.dot(forward_pass["next_stm"], model.parameters["weights"]["W_OutputSoftmax"]) + model.parameters["bias"]["b_OutputSoftmax"]
print(f"softmax(Output)\n {tf.nn.softmax(output_softmax)}")
print("\n========END OF PRINTING FOR MY_LSTM========\n")

print("\n========START OF TENSORFLOW LSTM===========\n")
model_tf = build_model(x)
print("Prediction is: ", model_tf.predict(x))
print("\n========END OF TENSORFLOW LSTM=============\n")


print("\n\n======START OF BACKWARDPROPAGATION=======\n")
model.backward(forward_pass=forward_pass, prediction=output_softmax, targets=[[1.0],[0.0]])



INPUTS

(x) Input:
 [[9]]
(prev_stm) {h} Previous hidden state:
 [[0. 0. 0. 0. 0. 0.]]
(prev_ltm) {c} Previous memory cell:
 [[0. 0. 0. 0. 0. 0.]]




(next_ltm) {h} Next hidden state:
 [[-0.17110701  0.30377355  0.07750367 -0.20858678 -0.24969392  0.01808954]]
(next_stm) {c} Next memory cell:
 [[-0.6403307   0.9078139   0.08001236 -0.21188491 -0.25719292  0.12117781]]
(Output_gate) RESHAPED:
 [0.30277745 0.42186005 0.97071239 0.99912253 0.99215558 0.1500109 ]
softmax(Output)
 [[0.51996451 0.48003549]]




1
Prediction is:  [[0.5001586  0.49984145]]






INPUTS:
forward_pass: 
{'Forget': array([[0.32321709],
       [0.98716645],
       [0.11279658],
       [0.00507193],
       [0.67489138],
       [0.04492773]]), 'Input': array([[0.8320444 ],
       [0.9916356 ],
       [0.08001527],
       [0.55398839],
       [0.25725837],
       [0.12332961]]), 'Candidate': array([[-0.76958718],
       [ 0.91547126],
       [ 0.99996369],
       [-0.38247175],
       [-0.99974559],
       [ 0.982

ValueError: shapes (6,6) and (7,1) not aligned: 6 (dim 1) != 7 (dim 0)

In [6]:
def build_model(X):
    model = Sequential()
    print(X.shape[1])
    model.add(Embedding(num_words, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])

    return model


In [5]:

def my_build_model(hidden_size, input_size):
    lstm = MY_LSTM(hidden_size = hidden_size, input_size = input_size, optimizer=0, loss_func=mean_squared_error)
    return lstm



In [10]:

def train_split(X, Y, test_size):
    length = int(len(X) * test_size)
    X_train = X[1:length]
    X_valid = X[length:]
    Y_train = Y[1:length]
    Y_valid = Y[length:]
    return X_train, X_valid, Y_train, Y_valid



In [72]:

def my_train(lstm, X, Y, hidden_size, input_size):
    """
    X are all sentences: [[9, 123, 5423, 121], [1,2,3,4]]
    Y are all features:  [[1.0], [0.0]]
    
    
    inputs is a sentence: [9, 123, 5423, 121]
    targets is a feature : [1.0]
    
    """

    # Hyper-parameters
    num_epochs = 5

    # Initialize hidden state as zeros
    # hidden_state = np.zeros((hidden_size, 1))

    X_train, X_valid, Y_train, Y_valid = train_split(X = X, Y = Y, test_size = 0.20)

    # Track loss
    training_loss, validation_loss = [], []

    # For each epoch
    for i in range(20):

        # Track loss
        epoch_training_loss = 0
        epoch_validation_loss = 0

        # For each sentence in validation set
        
        # inputs are sentences 
        for inputs, targets in zip(X_valid, Y_valid):
            print(f"targets inside training loop: {Y_valid}" )
            # Forward pass
#             print(f"input : {inputs} ")
#             print(f"input type: {type(inputs)} ")
#             print(f"targets: {targets} ")
#             print(f"targets shape: {type(targets)} ")
          # Initialize hidden state and cell state as zeros
            stm_prev = np.zeros((hidden_size, 1))
            ltm_prev = np.zeros((hidden_size, 1))
            
            forward_pass = lstm.forward(inputs)
            
            
            
            
#             print(f"return of forward: {forward_pass}")
#             print(f"forward_pass['result'] {forward_pass['result']}")
            print(f"forward_pass['result'][-1] : {forward_pass['result'][-1]}")
            
            
            
            # Backward pass
            loss = lstm.calculate_loss(forward_pass["result"][-1], [[targets]])

            # Update loss
            epoch_validation_loss += loss
       
        # For each sentence in training set
        t = 1
        for inputs, targets in zip(X_train, Y_train):

            # One-hot encode input and target sequence
            #  inputs_one_hot = dataset.one_hot_encode_sequence(inputs)
            #  targets_one_hot = dataset.one_hot_encode_sequence(targets)

       

            # Forward pass
            forward_pass = lstm.forward(inputs)

            # Backward pass
            loss, grads = lstm.backward(forward_pass, [[targets]])

            # Update parameters

            params = lstm.update_parameters(grads=grads["weights"], t=t)
            t += 1
            # Update loss
            #output_sentence = [dataset.idx_to_word[np.argmax(output)] for output in forward_pass["output_s"]]

            epoch_training_loss += loss

        # Save loss for plot
        training_loss.append(epoch_training_loss / len(X_train))
        validation_loss.append(epoch_validation_loss / len(X_valid))

        # Print loss every 10 epochs
        if i % 10 == 0:
            print(f'Epoch {i}, training loss: {training_loss[-1]}, validation loss: {validation_loss[-1]}')
            print(f'Input sentence {i}:')
            print(inputs)

            print(f'\nTarget sequence {i}:')
            print(targets)

            print('\nPredicted sequence:')
            print([np.argmax(output)] for output in forward_pass["result"])
    return training_loss, validation_loss


if __name__ == "__main__":
    embed_dim = 12
    lstm_out = 300
    batch_size = 32
    input_size = 200
    #X = [[0,0,9,6,324,2,4,131,289,109,293,9],[2,84,67,60,74,97, 4,667,388,554,67,46,15]]
    # Y = [[1], [0]]
#     print(type(data))
#     print(data['sentiment'])
    Y = pd.get_dummies(data['sentiment']).values[:,0]   
    G = pd.DataFrame(Y.reshape(-1, 1))
#     print(X.shape)
#     print(type(Y[0]))
#     print(Y.shape)
#     print(G.shape)
    print(f"===============")
#     print(f"Y : {Y}" )
#     print(G.shape)
    
#     Y = data[1]
    model = my_build_model(X, hidden_size=lstm_out, input_size = len(X[0]))
    my_train(model, X, G, lstm_out, input_size)

targets inside training loop:           0
1000  False
1001   True
1002  False
1003  False
1004   True
...     ...
4995  False
4996  False
4997   True
4998  False
4999   True

[4000 rows x 1 columns]
forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)
Epoch 0, training loss: 0.001001001001001001, validation loss: 0.00025
Input sentence 0:
[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0

forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)
targets inside training loop:           0
1000  False
1001   True
1002  False
1003  False
1004   True
...     ...
4995  False
4996  False
4997   True
4998  False
4999   True

[4000 rows x 1 columns]
forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)
targets inside training loop:           0
1000  False
1001   True
1002  False
1003  False
1004   True
...     ...
4995  False
4996  False
4997   True
4998  False
4999   True

[4000 rows x 1 columns]
forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)
Epoch 10, training loss: 0.001001001001001001, validation loss

forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)
targets inside training loop:           0
1000  False
1001   True
1002  False
1003  False
1004   True
...     ...
4995  False
4996  False
4997   True
4998  False
4999   True

[4000 rows x 1 columns]
forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)
targets inside training loop:           0
1000  False
1001   True
1002  False
1003  False
1004   True
...     ...
4995  False
4996  False
4997   True
4998  False
4999   True

[4000 rows x 1 columns]
forward_pass['result'][-1] : [[1.]]
targets inside loss func: [[0]]
targets inside backward func: [[0]]
Output.shape (300, 1)
tanh.shape [1.]
dc shape: (300, 1)
dh shape: (300, 1)


In [73]:
print(model.parameters)

{'weights': {'W_Forget': array([[ 0.04230445, -0.00828666,  0.02851241, ...,  0.04726605,
         0.01827041, -0.02315239],
       [ 0.05941409, -0.04375356, -0.06355351, ...,  0.00407919,
        -0.14500059,  0.04052528],
       [-0.00212553,  0.00600893, -0.01418321, ...,  0.10642387,
         0.02766756, -0.05203458],
       ...,
       [-0.07516916, -0.12781673,  0.02946873, ..., -0.01015268,
         0.05676914,  0.06283404],
       [-0.0529668 ,  0.16381545,  0.09436224, ...,  0.08223945,
         0.01360032, -0.00200551],
       [-0.06626347,  0.03461789, -0.09381877, ...,  0.08917173,
        -0.01112865, -0.05160694]]), 'W_Input': array([[ 0.03324818,  0.01967499,  0.05703908, ...,  0.04314924,
        -0.11016764, -0.0436574 ],
       [ 0.08157928,  0.05701869, -0.0750379 , ...,  0.02907625,
         0.00175879,  0.01633718],
       [ 0.04517571,  0.11817931,  0.04185157, ..., -0.00292897,
        -0.03184878,  0.01579642],
       ...,
       [-0.13770719,  0.01294298,  0.0

In [74]:
X_train, X_valid, Y_train, Y_valid = train_split(X = X, Y = Y, test_size = 0.20)
embed_dim = 12
hidden_size = 300
batch_size = 32
input_size = 2500
for inputs, targets in zip(X_train, Y_train):

    # One-hot encode input and target sequence
    #  inputs_one_hot = dataset.one_hot_encode_sequence(inputs)
    #  targets_one_hot = dataset.one_hot_encode_sequence(targets)

    # Forward pass
    forward_pass = model.forward(inputs)
    
    
    print(f" Prediction: {forward_pass['result'][-1].squeeze()} and truth is {targets}")

 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is Fals

KeyboardInterrupt: 