In [1]:
# import pandas as pd
#  import matplotlib.pyplot as plt
from activation_functions import tanh_activation, sigmoid
import numpy as np
import pandas as pd
import json
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
import re
from sklearn.metrics import mean_squared_error

# HyperParameters:
embed_dim = 14
lstm_out = 50
batch_size = 32
num_words = 2500


In [2]:
## Preparing dataset

def convert(x):
    """
    Coverting JSON to pandas dataframe

    """    
    ob = json.loads(x)
    for k, v in ob.items():
        if isinstance(v, list):
            ob[k] = ','.join(v)
        elif isinstance(v, dict):
            for kk, vv in v.items():
                ob['%s_%s' % (k, kk)] = vv
            del ob[k]
    return ob



def filter_data(data):
    """
    Converting into pandas dataframe and filtering only text and ratings given by the users
    """

    df = pd.DataFrame([convert(line) for line in data])
    df.drop(columns=df.columns.difference(['text','stars']),inplace=True)
    df.loc[:, ("sentiment")] = 0
    

#     #I have considered a rating above 3 as positive and less than or equal to 3 as negative.
    df.loc[:,'sentiment']=['pos' if (x>3) else 'neg' for x in df.loc[:, 'stars']]
    df.loc[:,'text'] = df.loc[:,'text'].apply(lambda x: x.lower())
    df.loc[:,'text'] = df.loc[:,'text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]','',x)))
    for idx,row in df.iterrows():
        df.loc[:,'text']= [x for x in df.loc[:,'text']]
    return df


In [181]:
json_filename = 'review_mockup.json'
with open(json_filename,'rb') as f:
    data = f.readlines()
data = filter_data(data)
tokenizer = Tokenizer(num_words = num_words, split=' ')
tokenizer.fit_on_texts(data.loc[:,'text'].values)
#print(tokenizer.word_index)  # To see the dicstionary
X = tokenizer.texts_to_sequences(data.loc[:,'text'].values)
# X = pad_sequences(X)
# print((X[0]))

In [168]:
class MY_LSTM:
    def __init__(self, hidden_size, input_size, optimizer, loss_func):
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.output_size = 2
        self._optimizer = optimizer
        self._loss_func = loss_func
        self.USE_OPTIMIZER = True


        # input_size = 2
        # hidden_size = 1
        # z_size = 24
        z_size = self.hidden_size + self.input_size

        self.parameters = {
            "weights": {
                "Forget": self.__init_orthogonal(np.random.randn(z_size, self.hidden_size)),
                "Input": self.__init_orthogonal(np.random.randn(z_size, self.hidden_size)),
                "Candidate": self.__init_orthogonal(np.random.randn(z_size, self.hidden_size)),
                "Output": self.__init_orthogonal(np.random.randn(z_size, self.hidden_size)),
                "OutputSoftmax": self.__init_orthogonal(np.random.randn(self.hidden_size, self.output_size)),
            },
            "recurrent": {
                "Forget": self.__init_orthogonal(np.random.randn(self.hidden_size, self.hidden_size)),
                "Input": self.__init_orthogonal(np.random.randn(self.hidden_size, self.hidden_size)),
                "Candidate": self.__init_orthogonal(np.random.randn(self.hidden_size, self.hidden_size)),
                "Output": self.__init_orthogonal(np.random.randn(self.hidden_size, self.hidden_size))
            },
            "bias": {
                "Forget": np.ones((1, self.hidden_size)),
                "Input": np.ones((1, self.hidden_size)),
                "Candidate": np.ones((1, self.hidden_size)),
                "Output": np.ones((1, self.hidden_size)),
                "OutputSoftmax": np.ones((1, self.output_size)),

            }
        }

    @property
    def optimizer(self):
        return self._optimizer

    @optimizer.setter
    def optimizer(self, optimizer):
        self._optimizer = optimizer


    @property
    def loss_func(self):
        return self._loss_func

    @loss_func.setter
    def loss_func(self, loss_func):
        self.loss_func = loss_func


    def __init_orthogonal(self, param):
        """
        Initializes weight parameters orthogonally.
        This is a common initiailization for recurrent neural networks.

        Refer to this paper for an explanation of this initialization:
            https://arxiv.org/abs/1312.6120
        """
        if param.ndim < 2:
            raise ValueError("Only parameters with 2 or more dimensions are supported.")

        rows, cols = param.shape

        new_param = np.random.randn(rows, cols)

        if rows < cols:
            new_param = new_param.T

        # Compute QR factorization
        q, r = np.linalg.qr(new_param)

        d = np.diag(r, 0)
        ph = np.sign(d)
        q *= ph

        if rows < cols:
            q = q.T

        new_param = q

        return new_param

    @property
    def get_parameters(self):
        """
        Returns weights and biases as 2d array

        """
        return self.parameters

    def forward(self, input_data, prev_stm, prev_ltm):
        """
        Arguments:
            input_data -- your input data at timestep "t", numpy array of shape (n_x, m).
            prev_stm -- h at timestep "t-1", numpy array of shape (n_a, m)
            prev_ltm -- c at timestep "t-1", numpy array of shape (n_a, m)
        Returns:
            outputs -- prediction at timestep "t", numpy array of shape (n_v, m)
            
            Weight shape:
            All of the weights    shape (self.hidden_size, z_size)
            Except W_OutputSoftmax      (self.hidden_size, self.output_size)
            
            Data shapes:
            input_data            shape (1,1)
            prev_stm and prev_ltm shape (1,hidden_state)
            concat                shape is (1, z_size)
            concat.T              shape is (z_size, 1)
             - forget_gate        shape is (hidden_size, 1)
             - input_gate         shape is (hidden_size, 1)
             - candidate          shape is (hidden_size, 1)
             - next_ltm           shape is (1, hidden_size)
        """

        # Save a list of computations for each of the components in the LSTM
        
        concat = np.concatenate((prev_stm, input_data), axis=1)
#         print(f"Concat value is: \n", concat, "\nConcat.T value is: \n", concat.T)
        
#         Compute the forget gate
        print("=======Computation of the forget gate:======")
#         print(f"W_forget:\n {self.parameters['weights']['Forget']}\nconcat:\n {concat.T}\nb_Forget: {self.parameters['bias']['Forget']}")
        z = np.dot(prev_stm, self.parameters['recurrent']['Forget'])
        y = np.dot(concat, self.parameters['weights']['Forget'])
        print(f"np.dot(stm, forget): {z}")
        print(f"np.dot(concat, forget): {y}")
        print(f"bias: {self.parameters['bias']['Forget']}")
        
        forget_gate = sigmoid(np.dot(concat, self.parameters["weights"]["Forget"]) + np.dot(prev_stm, self.parameters["recurrent"]["Forget"]) + self.parameters["bias"]["Forget"])
        print(f"Forget_gate value is: \n", forget_gate)
        print("===========================================")


        # Compute the input gate
        print(f"input_gate = concate.shape is {concat.shape} dot W_Input shape is {self.parameters['weights']['Input'].shape}")
        input_gate = sigmoid(np.dot(concat, self.parameters["weights"]["Input"]) + np.dot(prev_stm, self.parameters["recurrent"]["Input"]) + self.parameters["bias"]["Input"])
        print(f"Input_gate value is: \n", input_gate)

        # Compute the candidate cell value
        candidate = np.tanh(np.dot(concat, self.parameters["weights"]["Candidate"]) + np.dot(prev_stm, self.parameters["recurrent"]["Candidate"]) + self.parameters["bias"]["Candidate"])
        print(f"Candidate_gate value is: \n", candidate)

        # Compute the memory cell
        print(f"==============next_ltm===========\n")
        print(f"forget_gate * prev_ltm:\n {forget_gate * prev_ltm}")
        print(f"input_gate * candidate: \n {input_gate * candidate}")
        print(f"next_ltm:{forget_gate * prev_ltm + input_gate * candidate} ")
        next_ltm = forget_gate * prev_ltm + input_gate * candidate
        print(f"next_ltm value is: \n", next_ltm)
        print("==================================")


        # Compute the output gate
        output_gate = sigmoid(np.dot(concat, self.parameters["weights"]["Output"]) + np.dot(prev_stm, self.parameters["recurrent"]["Output"]) + self.parameters["bias"]["Output"])
        print(f"Output_gate value is: \n", output_gate)

        # Compute the next hidden state
        next_stm = output_gate * np.tanh(next_ltm)
        print(f"next_stm value is: \n", next_stm)

        forward_pass = {
            "Forget": forget_gate,
            "Input": input_gate,
            "Candidate": candidate,
            "Output": output_gate,
            "next_ltm": next_ltm,
            "next_stm": next_stm,
            "Concat_Input": concat
        }
        
        return forward_pass

    def __cross_entropy(self, predictions, targets, epsilon=1e-12):
        """
        Computes cross entropy between targets (encoded as one-hot vectors)
        and predictions.
        Input: predictions (N, k) ndarray
               targets (N, k) ndarray
        Returns: scalar
        """
        predictions = np.clip(predictions, epsilon, 1. - epsilon)
        N = predictions.shape[0]
        ce = -np.sum(targets * np.log(predictions + 1e-9)) / N
        return ce


    def calculate_loss(self, prediction, targets):
        print(f"============IN LOSS=============")
        print(f"prediction: {prediction}")
        print(f"targets: {targets}")

        return self.loss_func(prediction, targets)

    def backward(self, forward_pass, prediction, targets):
        """
        Arguments:
        forward_pass -- dictionary:
                        "Forget_gate": forget_gate,
                        "Input_gate": input_gate,
                        "Candidate_gate": candidate,
                        "Output_gate": output_gate,
                        "next_ltm": next_ltm.T,
                        "next_stm": next_stm.T
                        
        targets -- your targets as a list of size m.
        Returns:
        loss -- crossentropy loss for all elements in output
        grads -- lists of gradients of every element in p
        
        Weight shape:
            All of the weights    shape (self.hidden_size, z_size)
            Except W_OutputSoftmax      (self.hidden_size, self.output_size)

        """

        gradients = {
            "weights": {
                "Forget": np.zeros_like(self.parameters["weights"]["Forget"]),
                "Input": np.zeros_like(self.parameters["weights"]["Input"]),
                "Candidate": np.zeros_like(self.parameters["weights"]["Candidate"]),
                "Output": np.zeros_like(self.parameters["weights"]["Output"]),
                "OutputSoftmax": np.zeros_like(self.parameters["weights"]["OutputSoftmax"]),
            },
            "recurrent": {
                "Forget": np.zeros_like(self.parameters["recurrent"]["Forget"]),
                "Input": np.zeros_like(self.parameters["recurrent"]["Input"]),
                "Candidate": np.zeros_like(self.parameters["recurrent"]["Candidate"]),
                "Output": np.zeros_like(self.parameters["recurrent"]["Output"]),
            }
#             "bias": {
#                 "b_Forget": np.zeros_like(self.parameters["bias"]["Forget"]),
#                 "b_Input": np.zeros_like(self.parameters["bias"]["Input"]),
#                 "b_Candidate": np.zeros_like(self.parameters["bias"]["Candidate"]),
#                 "b_Output": np.zeros_like(self.parameters["bias"]["Output"]),
#                 "b_OutputSoftmax": np.zeros_like(self.parameters["bias"]["OutputSoftmax"]),
#             }
        }

        # Set the next cell and hidden state equal to zero
        print(f"==============PERFORMING BACKWARD===========\n")
        print(f"INPUTS:\nforward_pass: \n{forward_pass}\ntargets: \n{targets}\nprediction: \n{prediction}")
        print(f"\n==============END OF INPUTS=================\n")
        
        print("\n===============PROCESSING------------------\n")
        next_stm = np.zeros_like(forward_pass["next_stm"])  # h
        next_ltm = np.zeros_like(forward_pass["next_ltm"])  # C
#         next_ltm = forward_pass["next_ltm"]
#         next_stm = forward_pass['next_stm']

        loss = 0
        # Compute the cross entropy
        t =1
#         for t in reversed(range(self.hidden_size)):
        loss += self.loss_func(prediction[0].tolist(), targets)
        print(f"[{t}] LOSS:  {loss}")
        # Get the previous hidden cell state



        # Compute the derivative of the relation of the hidden-state to the output gate
        dv = np.copy(prediction)
        dv[np.argmax(targets)] -= 1
        # Update the gradient of the relation of the hidden-state to the output gate
        print(f"[{t}] dv (OUTPUT_GATE):  \n{dv}\n")
#         print(f"[{t}] gradients[W_OutputSoftmax]:  \n{gradients['weights']['W_OutputSoftmax']}\n")
        gradients["weights"]["OutputSoftmax"] += np.dot(next_stm.T, dv)
        #gradients["bias"]["b_OutputSoftmax"] += dv.T 


        # Compute the derivative of the hidden state and output gate
        dh = np.dot(dv, self.parameters["weights"]["OutputSoftmax"].T)
        print("next_stm: ",next_stm)
        print(f"[{t}] dh:  \n{dh}\n")
        dh += next_stm
        
        
        print(f"dh: {dh}")
        print(f"next_ltm: {next_ltm}")
        do = dh * tanh_activation(next_ltm)
        print(f"do: {do}")
        print(f"w_output: {forward_pass['Output']}")
        do = sigmoid(forward_pass["Output"], derivative=True) * do
        print(f"[{t}] do: \n{do}\n")
        # Update the gradients with respect to the output gate
        # =========================== ?????????DO WE NEED Concat_Input?????????????====================
        print(f"do: {do.T.shape}")
        print(f"Concat_input: {forward_pass['Concat_Input'].shape}")
        gradients["weights"]["Output"] += np.dot(do.T, forward_pass["Concat_Input"]).T
        gradients["recurrent"]["Output"] += np.dot(do.T, next_stm)

        #gradients["bias"]["b_Output"] += do
        print(f"[{t}] gradients[Output] AFTER UPDATE:  \n{gradients['weights']['Output']}\n")


        # Compute the derivative of the cell state and candidate g
        dC = np.copy(next_ltm)
        dC += dh * forward_pass["Output"] * tanh_activation(next_ltm, derivative=True)
        print(f"[{t}] dC:  \n{dC}\n")

        dg = dC * forward_pass["Input"]
        dg = tanh_activation(forward_pass["Candidate"], derivative=True) * dg
        print(f"[{t}] dg:  \n{dg}\n")


        # Update the gradients with respect to the candidate
        print(f"[{t}] gradients[Candidate] BEFORE UPDATE:  \n{gradients['weights']['Candidate']}\n")
        # =========================== ?????????DO WE NEED Concat_Input?????????????====================
        gradients["weights"]["Candidate"] += np.dot(forward_pass["Concat_Input"].T, dg)
        gradients["recurrent"]["Candidate"] += np.dot(dg.T, next_stm)

        #gradients["bias"]["b_Output"] += dg
        print(f"[{t}] gradients[Candidate] AFTER UPDATE:  \n{gradients['weights']['Candidate']}\n")


        # Compute the derivative of the input gate and update its gradients
        di = dC * forward_pass["Candidate"]
        di = sigmoid(forward_pass["Input"], True) * di
        print(f"[{t}] di:  \n{di}\n")

        print(f"[{t}] gradients[Input] BEFORE UPDATE:  \n{gradients['weights']['Input']}\n")
        # =========================== ?????????DO WE NEED Concat_Input?????????????====================
        gradients["weights"]["Input"] += np.dot(forward_pass["Concat_Input"].T, di)
        gradients["recurrent"]["Input"] += np.dot(di.T, next_stm)

        #gradients["bias"]["b_Input"] += di
        print(f"[{t}] gradients[Input] AFTER UPDATE:  \n{gradients['weights']['Input']}\n")


        # Compute the derivative of the forget gate and update its gradients
        df = dC * next_ltm
        df = sigmoid(forward_pass["Forget"]) * df
        print(f"[{t}] df:  \n{df}\n")


        print(f"[{t}] gradients[Forget] BEFORE UPDATE:  \n{gradients['weights']['Forget']}\n")
        # =========================== ?????????DO WE NEED Concat_Input?????????????====================
        gradients["weights"]["Forget"] += np.dot(forward_pass["Concat_Input"].T, df)
        gradients["recurrent"]["Forget"] += np.dot(df.T, next_stm)

        #gradients["bias"]["b_Forget"] += df
        print(f"[{t}] gradients[Forget] AFTER UPDATE:  \n{gradients['weights']['Forget']}\n")


        # Compute the derivative of the input and update the gradients of the previous hidden and cell state
        dz = (np.dot(df, self.parameters["weights"]["Forget"].T) + np.dot(di, self.parameters["weights"]["Input"].T) + np.dot(
            dg, self.parameters["weights"]["Candidate"].T) + np.dot(do, self.parameters["weights"]["Output"].T))
        print(f"[{t}] dz:  \n{dz}\n")

        dh_prev = dz[:self.hidden_size, :]
        print(f"[{t}] dh_prev:  \n{dh_prev}\n")

        dC_prev = forward_pass["Forget"] * dC
        print(f"[{t}] dC_prev:  \n{dC_prev}\n")


        # Clip gradients
#         print(f"=========\nGRADS BEFORE CLIP:  \n{grads}\n")

        grads = self.__clip_gradient_norm(gradients)

        print(f"=========\nGRADS AFTER CLIP:  \n{grads}\n")


        return loss, grads

    def __clip_gradient_norm(self, grads, max_norm=0.25):
        """
        Clips gradients to have a maximum norm of `max_norm`.
        This is to prevent the exploding gradients problem.
        """
        # Set the maximum of the norm to be of type float
        max_norm = float(max_norm)
        total_norm = 0
        # Calculate the L2 norm squared for each gradient and add them to the total norm
        for gate, grad in grads["weights"].items():
            grad_norm = np.sum(np.power(grad, 2))
            total_norm += grad_norm
        total_norm = np.sqrt(total_norm)
        # Calculate clipping coeficient
        clip_coef = max_norm / (total_norm + 1e-6)
        # If the total norm is larger than the maximum allowable norm, then clip the gradient
        if clip_coef < 1:
            for gate, grad in grads["weights"].items():
                grad *= clip_coef
        return grads

    def update_parameters(self, grads, t, lr=0.01):
        # Take a step
        parameters = self.get_parameters["weights"]
        if (self.USE_OPTIMIZER):
            updated_parameters = self.optimizer(parameters=parameters, gradients=grads, learning_rate=lr, t=t)
        else:
            for (_, parameter), (_, grad) in zip(parameters.items(), grads.items()):
                parameter -= lr * grad

In [169]:
"""
TESTING OF FORWARD PASS WITH DUMMY VALUES
"""

np.random.seed(1337)
# Example data dimensions
batch_size = 1  # Number of training examples
input_size = 1  # Number of features in the input
hidden_state = 6  # Number of units in the hidden state/memory cell
model = my_build_model(hidden_size=hidden_state, input_size = input_size)
n_y = 1  # Number of units in the output

z_size = hidden_state + input_size
# Generate example input data, previous hidden state, and previous memory cell
x = np.array([[9]])
z = np.array([[11]])
prev_stm = np.zeros((batch_size, hidden_state))
prev_ltm = np.zeros((batch_size, hidden_state))
print("\n=========Printing for MY_LSTM===============\n")
print("INPUTS\n")
print("(x) Input:\n", x)
print("(prev_stm) {h} Previous hidden state:\n", prev_stm)
print("(prev_ltm) {c} Previous memory cell:\n", prev_ltm)
print("\n=========PERFORM FORWARD PASS==============\n")

# Perform forward pass
print("\n=========APPLY SOFTMAX (LAST LAYER OF LSTM)\n")
forward_pass = model.forward(input_data=x, prev_stm=prev_stm, prev_ltm=prev_ltm)
forward_pass = model.forward(input_data=z, prev_stm=forward_pass["next_stm"], prev_ltm=forward_pass["next_ltm"])
reshaped_output = forward_pass['Output'].reshape(hidden_state)

# Print the output
print("===========OUTPUT============================")
print("(next_ltm) {h} Next hidden state:\n", forward_pass["next_stm"])
print("(next_stm) {c} Next memory cell:\n", forward_pass["next_ltm"])
print("(Output_gate) RESHAPED:\n", forward_pass['Output'].reshape(hidden_state))
output_softmax = np.dot(forward_pass["next_stm"], model.parameters["weights"]["OutputSoftmax"]) + model.parameters["bias"]["OutputSoftmax"]
print(f"softmax(Output)\n {tf.nn.softmax(output_softmax)}")
print("\n========END OF PRINTING FOR MY_LSTM========\n")

print("\n========START OF TENSORFLOW LSTM===========\n")
model_tf = build_model(x)
print("Prediction is: ", model_tf.predict(x))
print("\n========END OF TENSORFLOW LSTM=============\n")


print("\n\n======START OF BACKWARDPROPAGATION=======\n")
model.backward(forward_pass=forward_pass, prediction=output_softmax, targets=[[1.0],[0.0]])
print("\n\n======FINISH OF BACKWARDPROPAGATION======\n")



INPUTS

(x) Input:
 [[9]]
(prev_stm) {h} Previous hidden state:
 [[0. 0. 0. 0. 0. 0.]]
(prev_ltm) {c} Previous memory cell:
 [[0. 0. 0. 0. 0. 0.]]




np.dot(stm, forget): [[0. 0. 0. 0. 0. 0.]]
np.dot(concat, forget): [[ 0.53805273  1.99633296  4.20145678 -4.13668419  4.09812628  4.08910046]]
bias: [[1. 1. 1. 1. 1. 1.]]
Forget_gate value is: 
 [[0.82318147 0.95240819 0.99452164 0.04161918 0.9939289  0.99387419]]
input_gate = concate.shape is (1, 7) dot W_Input shape is (7, 6)
Input_gate value is: 
 [[0.81862287 0.98570103 0.12355285 0.98403383 0.96104844 0.99768908]]
Candidate_gate value is: 
 [[ 0.92100606 -0.87574668  0.99475843  0.92899382 -0.99997627 -0.99713983]]

forget_gate * prev_ltm:
 [[0. 0. 0. 0. 0. 0.]]
input_gate * candidate: 
 [[ 0.75395663 -0.86322441  0.12290524  0.91416134 -0.96102564 -0.99483552]]
next_ltm:[[ 0.75395663 -0.86322441  0.12290524  0.91416134 -0.96102564 -0.99483552]] 
next_ltm value is: 
 [[ 0.75395663 -0.86322441  0.12290524  0.91416134 -0.96102564 -0

In [64]:
def build_model(X):
    model = Sequential()
    print(X.shape[1])
    model.add(Embedding(num_words, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out))
    model.add(Dropout(0.2))
    model.add(Dense(2,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])

    return model


In [3]:

def my_build_model(hidden_size, input_size):
    lstm = MY_LSTM(hidden_size = hidden_size, input_size = input_size, optimizer=0, loss_func=mean_squared_error)
    return lstm



In [174]:

def train_split(X, Y, test_size):
    length = int(len(X) * test_size)
    X_train = X[1:length]
    X_valid = X[length:]
    Y_train = Y[1:length]
    Y_valid = Y[length:]
    return X_train, X_valid, Y_train, Y_valid



In [194]:

def my_train(model, X, Y, hidden_size, input_size):
    """
    X are all sentences: [[9, 123, 5423, 121], [1,2,3,4]]
    Y are all features:  [[1.0], [0.0]]
    
    
    inputs is a sentence: [9, 123, 5423, 121]
    targets is a feature : [1.0]
    
    """

    # Hyper-parameters
    num_epochs = 5
    np.random.seed(1337)

    # Initialize hidden state as zeros
    # hidden_state = np.zeros((hidden_size, 1))

    X_train, X_valid, Y_train, Y_valid = train_split(X = X, Y = Y, test_size = 0.20)

    # Track loss
    training_loss, validation_loss = [], []

    # For each epoch
    for i in range(num_epochs):

        # Track loss
        epoch_training_loss = 0
        epoch_validation_loss = 0
        
        # inputs are sentences 
        for inputs, targets in zip(X_valid, Y_valid):
            print(f"targets inside training loop: {Y_valid}" )
            # Forward pass
            print(f"input : {inputs} ")
            print(f"targets: {targets} ")
          # Initialize hidden state and cell state as zeros
            prev_stm = np.zeros((1, hidden_state))
            prev_ltm = np.zeros((1, hidden_state))
    
            for word in inputs:
                word = np.array([[word]])
                forward_pass = model.forward(word, prev_stm, prev_ltm)
                prev_stm = forward_pass["next_stm"]
                prev_ltm = forward_pass["next_ltm"]
            output_softmax = np.dot(forward_pass["next_stm"], model.parameters["weights"]["OutputSoftmax"]) + model.parameters["bias"]["OutputSoftmax"]
            print("output_softmax: ", output_softmax)
            
            loss = lstm.calculate_loss(forward_pass["result"][-1], [[targets]])

            # Update loss
            epoch_validation_loss += loss
       
        # For each sentence in training set
        t = 1
        for inputs, targets in zip(X_train, Y_train):

            # One-hot encode input and target sequence
            #  inputs_one_hot = dataset.one_hot_encode_sequence(inputs)
            #  targets_one_hot = dataset.one_hot_encode_sequence(targets)

       

            # Forward pass
            forward_pass = model.forward(inputs, stm_prev, ltm_prev)

            # Backward pass
            loss, grads = model.backward(forward_pass, [[targets]])

            # Update parameters

            params = model.update_parameters(grads=grads["weights"], t=t)
            t += 1
            # Update loss
            #output_sentence = [dataset.idx_to_word[np.argmax(output)] for output in forward_pass["output_s"]]

            epoch_training_loss += loss

        # Save loss for plot
        training_loss.append(epoch_training_loss / len(X_train))
        validation_loss.append(epoch_validation_loss / len(X_valid))

        # Print loss every 10 epochs
        if i % 10 == 0:
            print(f'Epoch {i}, training loss: {training_loss[-1]}, validation loss: {validation_loss[-1]}')
            print(f'Input sentence {i}:')
            print(inputs)

            print(f'\nTarget sequence {i}:')
            print(targets)

            print('\nPredicted sequence:')
            print([np.argmax(output)] for output in forward_pass["result"])
    return training_loss, validation_loss


if __name__ == "__main__":
    embed_dim = 12
    lstm_out = 4
    batch_size = 32
    
    Y = pd.get_dummies(data['sentiment']).values[:,0]   
    model = my_build_model(hidden_size=lstm_out, input_size = 1)
    my_train(model, X, Y, lstm_out, input_size)

targets inside training loop: [False  True False ...  True False  True]
input : [700, 3, 1417, 2, 135, 2259, 6, 32, 14, 746, 11, 878, 17, 34, 95, 598, 521, 3, 181, 377, 5, 184, 1, 545, 500, 2, 1417, 190, 1787, 8, 5, 1046, 10, 344, 4, 153, 1, 878, 693, 225, 6, 166, 363, 1621, 187, 6, 29, 488, 7, 29, 717, 11, 1, 759, 2, 52, 932, 2, 40, 2, 1973, 6, 63, 31, 318, 52, 51] 
targets: False 


ValueError: shapes (1,6) and (4,4) not aligned: 6 (dim 1) != 4 (dim 0)

In [177]:
print(model.parameters)

{'weights': {'Forget': array([[ 0.03048122, -0.03666253, -0.00893607, -0.02877448],
       [ 0.02800223, -0.04561165, -0.00664564,  0.01617106],
       [ 0.01399042,  0.03075436, -0.01881047, -0.00031739],
       ...,
       [ 0.01079983,  0.0273924 , -0.04248921,  0.03597927],
       [-0.02439621,  0.01681   ,  0.01221506,  0.01667793],
       [ 0.0215796 ,  0.01751008, -0.0202022 , -0.02570455]]), 'Input': array([[-0.0463762 ,  0.00683881, -0.01025448, -0.00243446],
       [ 0.01002188,  0.04125598,  0.02091622, -0.02711964],
       [-0.04309387, -0.00364571, -0.00200082,  0.06383244],
       ...,
       [ 0.00035293,  0.03881205, -0.00276471, -0.02780999],
       [ 0.02801274,  0.01632623, -0.02110473, -0.0943948 ],
       [-0.0072498 , -0.05295839,  0.02167491, -0.03179608]]), 'Candidate': array([[ 0.04432224,  0.03288289,  0.02359354, -0.08944912],
       [-0.02213255, -0.0280371 ,  0.00056609, -0.0421228 ],
       [ 0.00395328,  0.02036906,  0.00468574,  0.00371264],
       ...,


In [74]:
X_train, X_valid, Y_train, Y_valid = train_split(X = X, Y = Y, test_size = 0.20)
embed_dim = 12
hidden_size = 300
batch_size = 32
input_size = 2500
for inputs, targets in zip(X_train, Y_train):

    # One-hot encode input and target sequence
    #  inputs_one_hot = dataset.one_hot_encode_sequence(inputs)
    #  targets_one_hot = dataset.one_hot_encode_sequence(targets)

    # Forward pass
    forward_pass = model.forward(inputs)
    
    
    print(f" Prediction: {forward_pass['result'][-1].squeeze()} and truth is {targets}")

 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is False
 Prediction: 1.0 and truth is True
 Prediction: 1.0 and truth is Fals

KeyboardInterrupt: 