# Product Review Summary Generator

# Approach

1. Import packages. Remember to import tensorflow keras. Check version of Tf, should be 2.0 or above
2. Bring all util functions in the main notebook
3. Load 50D glove embedding
4. Intialize global variables. These include

    - m = No. of training + test records to be loaded (m)
    - Tx = Max number of words in a review 
    - Ty = max number of words in the summary
    - vocab_size = number of words in the vocanbulary
    
5. Implement Sentence_to_indices. Clean-up unwanted chars in the words
6. Import Review & Summary data to get X,Y
7. Convert Y_Indices to One Hot
8. Split X & Y into test & train sets
9. Create the Embedding Layer
10. Create rest of the Model. Feed the embedding layer output to the rest of the model




# Next steps to Try

1. Try Sparse Cross Entrop Loss - https://stats.stackexchange.com/questions/326065/cross-entropy-vs-sparse-cross-entropy-when-to-use-one-over-the-other
2. See if some words can be stripped off from the vocanbulary (liek special characters). This will reduce vocab_size and thus help y_oh computation
3. Add Dropout Regularization to the Model
4. Increase Tx - that is allow more words in the review
5. Go through the one step attention model video. Tweak n_a & n_s parameters 

In [1]:
# Import packages
import numpy as np
import time
import importlib
import re
import sklearn.model_selection 


#importlib.reload(nmt_utils)
#importlib.reload(emo_utils)


try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

import tensorflow as tf
import csv
import pickle
from tensorflow import keras
import keras.backend as K

np.random.seed(0)

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Dropout, LSTM, Activation, Embedding, RepeatVector, Concatenate, Dot, Bidirectional
#from tensorflow.keras.layers.embeddings import Embedding
from tensorflow.keras.preprocessing import *
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.optimizers import Adam
from datetime import datetime
#from emo_utils import *
#from nmt_utils import *

print(tf.keras.__version__)
print(keras.__version__)

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


2.2.4-tf
2.2.4-tf


In [2]:
# Util Functions

def read_glove_vecs(glove_file):
    with open(glove_file, 'r') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map


# Read data file to get training & test data
def read_review_data(filename,m):
    review = []
    summary = []
    
    ctr = 0

    review_summary_data = []
    with open (filename) as csvDataFile:
        csvReader = csv.reader(csvDataFile)
         

        for row in csvReader:
            #review.append(row[1])
            #summary.append(row[0])
            #row = [d.replace('"', '') for d in data]
            
            review_summary_data.append((row[1], row[0])) 
            #print("Review is ", row[1])
            #print("Summary is ", row[0])
            
            ctr = ctr + 1
            if ctr<m:
                if ctr%(m*0.1)==0:
                    print("Number of reviews loaded ", ctr)
            else:
                break

    #X = np.asarray(review)
    #Y = np.asarray(summary)     

    #return X, Y
    
    
    return np.array(review_summary_data)

def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

def string_to_int(string, length, vocab):
    """
    Converts all strings in the vocabulary into a list of integers representing the positions of the
    input string's characters in the "vocab"
    
    Arguments:
    string -- input string, e.g. 'Wed 10 Jul 2007'
    length -- the number of time steps you'd like, determines if the output will be padded or cut
    vocab -- vocabulary, dictionary used to index every character of your "string"
    
    Returns:
    rep -- list of integers (or '<unk>') (size = length) representing the position of the string's character in the vocabulary
    """
    
    #make lower to standardize
    string = str(string)
    string = string.lower()
    
    
    string = string.replace('\'','')
    string = string.replace('"','')
    #print(string)
    
    
        
    string = re.sub(r"[^a-zA-Z0-9]+", ' ', string)
    #print("String length is  ", len(string))
    
    rep=[]  
    string = string.split()
    #print("number of words in string are ", len(string))
    
    if len(string) > length:
        string = string[:length]
        #print("String length after adjustment is  ", len(string))
        
    
    #rep = list(map(lambda x: vocab.get(x, '<unk>'), string))
    #rep = list(map(lambda x: vocab.get(x), string))
    for word_index in range (0,len(string)):
        idx = vocab.get(string[word_index])
        if idx:
            rep.append(idx)
    
    #print("length of rep indices list is ", len(rep))
    #print("Inside string_to_int, rep is ", rep)
    
    # Pad the remaining places in the sentence with <pad>
    if len(rep) < length:
        rep += [vocab['<pad>']] * (length - (len(rep)))
        #print("length of rep indices list after padding with <pad> is ", len(rep))  
    
          
    #print (rep)
    return rep

def preprocess_data(dataset, word_to_index, Tx, Ty,m):
    
    X1, Y1 = zip(*dataset)
    #print("X1 inside preprocess data ", X1)
    
    #X = str(X).split()
    #Y = str(Y).split()
    
    #print(X)
    #print(Y)
    X,Y= np.empty((m,Tx)),np.empty((m,Ty))     
    
    #X = [string_to_int(a, Tx, word_to_index) for a in X1]
    #Y = [string_to_int(t, Ty, word_to_index) for t in Y1]
    
    ctr = 0
    for sentence in X1:
        indices_X = np.array(string_to_int(sentence, Tx, word_to_index))
        # debug
        #if indices_X.shape[0]!=Tx:
            #print("indices_X shape !=50 for sentence ", sentence)
            #print("indices_X shape !=50 at position ", ctr)
            #print("indices_X shape !=50 , shape is  ", indices_X.shape[0])
        
        X[ctr] = indices_X
        ctr = ctr + 1
        
        
    ctr = 0
    for sentence in Y1:
        indices_Y = string_to_int(sentence, Ty, word_to_index) 
        Y[ctr] = indices_Y
        ctr = ctr + 1
        
        
    
    #Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
    #Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))

    #return X, np.array(Y)
    
    #print("X shape from preprocess _data is ", (np.array(X)).shape)
    #print("Y shape from preprocess _data is ", (np.array(Y)).shape)
    return np.array(X), np.array(Y)

def convert_indices_to_words(idx_array,dictionary):
    output = []
    #print("idx_array ", idx_array)
    for word_idx in idx_array:
        word = dictionary.get(int(word_idx))
        #print(word_idx)
        if word!=None:
            output.append(word)
    
    return output

In [3]:
# Implementation of Step # 3
word_to_index, index_to_word, word_to_vec_map = read_glove_vecs('../../glove.6B/glove.6B.50d.txt')


In [6]:
# Initialize global variables

m = 20 # Remember that the 1 record (header record) from the raw data file will be popped. So set m to be 1 more that the train + test records required
Tx = 40 # Maximum of 40 words in a review
Ty = 5 # Maximum 5 words in the summary of the review

# Add <pad> and '<unk> tokens to all the lists
index_to_word[0]="<pad>" # End of sequence word
word_to_index["<pad>"]=0
word_to_vec_map["<pad>"] = np.zeros((1,50))

# In the original glove.6B file, index = 4 had "!!!!". Since its extremely rare & ununsed, replacing this index position with "<unk>"

index_to_word[4]="<unk>" # End of sequence word
word_to_index["<unk>"]=4
word_to_vec_map["<unk>"] = np.zeros((1,50))

vocab_size = len(word_to_index)+1 #Since <pad> has been added (nothing existed at index = 0) and "unk" has been swapped

#print(word_to_index["None"])




In [7]:
# Unit testing review & summary pre-processing 

dset = read_review_data('file_for_testing_pre_processing.csv',3)

print("len(dset) ", len(dset))



#dataset= [[X_data,Y_data]]
#for i in range(1,m):
    #dataset.append((X_data[i], Y_data[i]))

X_Indices, Y_Indices = preprocess_data(dset, word_to_index, Tx, Ty,m)

print("X_Indices.shape ", X_Indices.shape)
print("Y_Indices.shape ", Y_Indices.shape)

#print("X_Indices[0,0:10]  ", X_Indices[0][1])

# print & check the source & target conversion to indices
index = 2

print("dataset[0] ", dset[index][0])
print("dataset[1] ", dset[index][1])
#print("Source date:", dataset[index][0])
#print("Target date:", dataset[index][1])
print()
print("Source after preprocessing (indices):", X_Indices[index])
print("Target after preprocessing (indices):", Y_Indices[index])
print()

print("sentence x is ", convert_indices_to_words(X_Indices[index],index_to_word))
print("sentence y is ", convert_indices_to_words(Y_Indices[index],index_to_word))

len(dset)  3
X_Indices.shape  (20, 40)
Y_Indices.shape  (20, 5)
dataset[0]  This is the third review
dataset[1]  Awesome product

Source after preprocessing (indices): [358160. 192973. 357266. 358029. 307253.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.      0.      0.      0.      0.      0.
      0.      0.      0.      0.]
Target after preprocessing (indices): [ 64354. 292984.      0.      0.      0.]

sentence x is  ['this', 'is', 'the', 'third', 'review', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
sentence y is  ['awesome', 'product', '<pad>', '<pad>'

In [8]:
# Load test & training data

dataset = []
#X_data, Y_data = read_review_data('product_reviews_modified.csv',m)

dataset = read_review_data('product_reviews_modified.csv',m)
# Remove the header rows
#X_data.pop(0)
#Y_data.pop(0)

#print("Number of Reviews & Summaries Loaded = ", len(X_train), len(Y_train))

#print(X_data[1], Y_data[1])
#print("X_data shape is ", X_data.shape)

print("len(dataset) ", len(dataset))



#dataset= [[X_data,Y_data]]
#for i in range(1,m):
    #dataset.append((X_data[i], Y_data[i]))

X_Indices, Y_Indices = preprocess_data(dataset, word_to_index, Tx, Ty,m)

print("X_Indices.shape ", X_Indices.shape)
print("Y_Indices.shape ", Y_Indices.shape)

#print("X_Indices[0,0:10]  ", X_Indices[0][1])

# print & check the source & target conversion to indices
index = m-1

print("dataset[0] ", dataset[index][0])
print("dataset[1] ", dataset[index][1])
#print("Source date:", dataset[index][0])
#print("Target date:", dataset[index][1])
print()
print("Source after preprocessing (indices):", X_Indices[index])
print("Target after preprocessing (indices):", Y_Indices[index])
print()

print("sentence x is ", convert_indices_to_words(X_Indices[index],index_to_word))
print("sentence y is ", convert_indices_to_words(Y_Indices[index],index_to_word))

Number of reviews loaded  2
Number of reviews loaded  4
Number of reviews loaded  6
Number of reviews loaded  8
Number of reviews loaded  10
Number of reviews loaded  12
Number of reviews loaded  14
Number of reviews loaded  16
Number of reviews loaded  18
len(dataset)  20
X_Indices.shape  (20, 40)
Y_Indices.shape  (20, 5)
dataset[0]  Twizzlers, Strawberry my childhood favorite candy, made in Lancaster Pennsylvania by Y & S Candies, Inc. one of the oldest confectionery Firms in the United States, now a Subsidiary of the Hershey Company, the Company was established in 1845 as Young and Smylie, they also make Apple Licorice Twists, Green Color and Blue Raspberry Licorice Twists, I like them all<br /><br />I keep it in a dry cool place because is not recommended it to put it in the fridge. According to the Guinness Book of Records, the longest Licorice Twist ever made measured 1.200 Feet (370 M) and weighted 100 Pounds (45 Kg) and was made by Y & S Candies, Inc. This Record-Breaking Twist

In [None]:
print(index_to_word[279682])
print(word_to_index['were'])

In [None]:
# One hot encoding of labels, if required

#start1 = time.process_time()
#https://www.tensorflow.org/api_docs/python/tf/one_hot
#Y_oh = tf.one_hot(Y_train_indices,vocab_size,on_value=None,off_value=None,axis=-1,dtype=tf.int8,name='One Hot')
#print("Y_oh.shape ", Y_oh.shape)
#print("Time taken in sec ", time.process_time() - start1) # Should take 3.5 seconds for 500000 records


In [9]:
# Split data into test & train

train_size = 0.7
train_records = round(train_size*m)
X = np.array(X_Indices)
Y = np.array(Y_Indices) #Y_oh

print("X.shape ", X.shape)
print("Y.shape ", Y.shape)

X_train_indices, X_test_indices = X[0:train_records,:],X[train_records:,:]
y_train_oh, y_test_oh = Y[0:train_records,:],Y[train_records:,:]

print("Training data shape ", X_train_indices.shape,y_train_oh.shape )
print("Test data shape ", X_test_indices.shape,y_test_oh.shape )

#y_train_oh, y_test_oh = train_test_split(X_train_indices, Y_oh, test_size=0.33)

X.shape  (20, 40)
Y.shape  (20, 5)
Training data shape  (14, 40) (14, 5)
Test data shape  (6, 40) (6, 5)


In [17]:
# GRADED FUNCTION: pretrained_embedding_layer

def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    
    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)
    
    ### START CODE HERE ###
    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len,emb_dim))
    
    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # Define Keras embedding layer with the correct output/input sizes, make it non-trainable. Use Embedding(...). Make sure to set trainable=False. 
    embedding_layer = Embedding(vocab_len,emb_dim,trainable=False)
    ### END CODE HERE ###

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None,))
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer

In [18]:
embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
# Check the function. Expected output should be **weights[0][1][3] =**	-0.3403
print("weights[0][1][3] =", embedding_layer.get_weights()[0][1][3])


weights[0][1][3] = -0.3403


In [12]:
# Objects required for the One step attention part of the model

# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(Ty, activation = "tanh")# Was Dense(10, activation = "tanh") in the week 3 course. In that course Ty=10, so this is updated to Ty
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook
dotor = Dot(axes = 1)

In [13]:
# GRADED FUNCTION: one_step_attention

def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    ### START CODE HERE ###
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
    #print(s_prev.shape[1])
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
    concat = concatenator([a,s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
    e = densor1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)
    energies = densor2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
    alphas = activator(energies)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
    context = dotor([alphas,a])
    ### END CODE HERE ###
    
    return context

In [14]:
# Parameters required for main model

n_a = 32 # Was 32 in the week 3 course
n_s = 64 # Was 64 in the week 3 course
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(vocab_size, activation=softmax)
#output_layer = Dense(Ty, activation=softmax)

In [22]:
# GRADED FUNCTION: model

def model(input_shape,word_to_vec_map, word_to_index,Tx, Ty, n_a, n_s, human_vocab_size, machine_vocab_size):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    human_vocab_size -- size of the python dictionary "human_vocab"
    machine_vocab_size -- size of the python dictionary "machine_vocab"

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    
    
    # X is commented out because the input to the model has to be sentence_indices
    #X = Input(shape=(Tx, human_vocab_size))
    
    
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    y0 = Input(shape=(n_s,), name='y0')
    s = s0
    c = c0
    yt = y0
    
    # Initialize empty list of outputs
    outputs = []
    
    ### START CODE HERE ###
    
    # Create the Main Model

    # This should be the initial part of the model that starts with the embedding layer

    # Define sentence_indices as the input of the graph, it should be of shape input_shape and dtype 'int32' (as it contains indices).
    #input_shape = (maxlen,) (in emojify example) which is further = Tx (Maximum words in the input sequence)
    sentence_indices = Input(shape=input_shape, dtype=np.int32)
    
    # Create the embedding layer pretrained with GloVe Vectors (≈1 line)
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
    
    # Propagate sentence_indices through your embedding layer, you get back the embeddings
    embeddings = embedding_layer(sentence_indices)   
    
    
    ##### The following lines of code have to be merged into one line. ###########
    #Embeddings have to be the input to the Bidirectional LSTM
    
    
    # Propagate the embeddings through an LSTM layer with 128-dimensional hidden state
    # Be careful, the returned output should be a batch of sequences.
    # The resulting output of the Bidirectional LSTM should be 'a'
    #X = LSTM(128, return_sequences=True, return_state = False)(embeddings)
    
    # Step 1: Define your pre-attention Bi-LSTM. Remember to use return_sequences=True. (≈ 1 line)
    #a = Bidirectional(LSTM(n_a, return_sequences=True),input_shape=X.shape)(X)
    
    
    # Merged layer, from Emojiy & Machine Tranlation (Week 3 Excercise)
    a = Bidirectional(LSTM(n_a, return_sequences=True))(embeddings)
    
    
    
    #### End of Merge. 
    
    out=[]
    
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t (≈ 1 line)
        context = one_step_attention(a, s)
        
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state] (≈ 1 line)
        s,_,c = post_activation_LSTM_cell(context, initial_state = [s,c])
        #s, _, c = post_activation_LSTM_cell(context, initial_state = [s, c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM (≈ 1 line)
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list (≈ 1 line)
        outputs.append(out)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs. (≈ 1 line)
    model = Model(inputs=[sentence_indices ,s0,c0], outputs=outputs)
    
    ### END CODE HERE ###
    
    return model

In [23]:
# Input value X should be of shape (m,Tx,). In Emojofy, sentences_to_indices returns arrays. 
# In the implementation above, the return type has been changed to a list. If there is an error, try converting the list to array (m,Tx,)
print(Tx)
model = model((Tx,), word_to_vec_map, word_to_index,Tx, Ty, n_a, n_s, vocab_size, vocab_size)


40


In [24]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 40)]         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 40, 50)       20000150    input_3[0][0]                    
__________________________________________________________________________________________________
s0 (InputLayer)                 [(None, 64)]         0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 40, 64)       21248       embedding_2[0][0]                
____________________________________________________________________________________________

In [15]:
opt = Adam(lr = 0.01,  beta_1=0.9,beta_2=0.999, decay = 0.01)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [16]:
# Train the model
print(X_train_indices.shape)
print("y_train_oh.shape ", y_train_oh.shape)
#print("tf.transpose(y_train_oh,perm=[1,0,2] ", tf.transpose(y_train_oh,perm=[1,0,2])
s0 = np.zeros((train_records, n_s))
c0 = np.zeros((train_records, n_s))

#https://www.tensorflow.org/api_docs/python/tf/transpose. #See the description in the week 3 course
#outputs = list(tf.transpose(y_train_oh,perm=[1,0,2]))
outputs = list(tf.transpose(y_train_oh,perm=[1,0]))
#outputs = y_train_oh

# Recreate the exact same model purely from the file
#model = keras.models.load_model('model_name.h5')


history = model.fit([X_train_indices, s0, c0], outputs, epochs=5, batch_size=100)

model_name = "model" + datetime.now().strftime("%d_%m_%Y_%H_%M_%S")+".h5"

# Save the model
model.save(model_name)

(14000, 50)
y_train_oh.shape  (14000, 5)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
# test the accuracy against the test set


#outputs_test = outputs_test[0:m-train_records,:,:]
#outputs_test = list(tf.transpose(y_test_oh,perm=[1,0,2]))
outputs_test = list(tf.transpose(y_test_oh,perm=[1,0]))

s0_test = np.zeros((X_test_indices.shape[0], n_s))
c0_test = np.zeros((X_test_indices.shape[0], n_s))
#s0_test = np.zeros((m-train_records+1, n_s))
#c0_test = np.zeros((m-train_records+1, n_s))

model.evaluate([X_test_indices, s0_test,c0_test], outputs_test, batch_size=100, verbose=1, sample_weight=None, steps=None, callbacks=None)



[25.796771144866945,
 3.1464252,
 3.1464252,
 3.1464252,
 3.1464252,
 3.1464252,
 0.097833335,
 0.107,
 0.30733332,
 0.492,
 0.663]

In [None]:

print(index_to_word[166369])

In [21]:
# Generate predictions against the test set and see how they match up with the true labels of the same test set
word = np.empty((50,5),dtype=np.object)

prediction_test = model.predict([X_test_indices[0:10,:], s0, c0])
prediction_test_idx = np.argmax(prediction_test, axis=-1)

prediction_test_idx= tf.transpose(prediction_test_idx,perm=[1,0])
print("prediction_test_idx.shape after tf.permute[1,0]", prediction_test_idx.shape) 
print("prediction idx for the record prediction_test_idx[0:1,:]", prediction_test_idx[3,:])

pred_list = list(prediction_test_idx)
#print(pred_list)

#for record in range(1:50):
for i in range(0,10):
    X = X_test_indices[i,:]
    x_wrd_keys = [index_to_word.get(a) for a in X]
    print (x_wrd_keys)
    
    y_pred_keys = [index_to_word.get(b) for b in pred_list[i]]
    print (y_pred_keys)
    print (prediction_test_idx[i,:])
    
    print("Finished test output number ", i)


prediction_test_idx.shape after tf.permute[1,0] (10, 5)
prediction idx for the record prediction_test_idx[0:1,:] tf.Tensor([166369      0      0      0      0], shape=(5,), dtype=int64)
['what', 'i', 'like', 'about', 'these', 'first', 'and', 'foremost', 'is', 'that', 'they', 'are', 'healthy', 'and', 'organic', 'and', 'not', 'stuffed', 'full', 'of', 'preservatives', 'i', 'also', 'trust', 'newman', 'products', 'and', 'have', 'always', 'loved', 'the', 'salad', 'dressings', 'and', 'pasta', 'sauces', 'they', 'make', 'for', 'me', 'not', 'my', 'dog', 'top', 'quality', 'br', 'at', 'first', 'my', 'dog']
[None, None, None, None, None]
tf.Tensor([166369      0      0      0      0], shape=(5,), dtype=int64)
Finished test output number  0
['my', 'malamutes', 'arent', 'too', 'picky', 'except', 'one', 'who', 'hates', 'peanut', 'butter', 'so', 'i', 'decided', 'to', 'try', 'these', 'treats', 'very', 'hard', 'and', 'crunchy', 'biscuits', 'of', 'a', 'decent', 'size', 'if', 'i', 'have', 'one', 'complaint

In [None]:

#EXAMPLES = ["I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than  most"]
EXAMPLES = ['Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo"','This is the second review']
#print(X_train[1])

#for example in EXAMPLES:
m_predict = len(EXAMPLES)
print(m_predict)



dataset_predict = []
for i in range(0,m_predict):
    dataset_predict.append((EXAMPLES[i], "y"))

X_predict_Indices, _ = preprocess_data(dataset_predict, word_to_index, Tx, Ty,m_predict)

    
#source = string_to_int(example, Tx, vocab_size)
#source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), source))).swapaxes(0,1)
prediction = model.predict([X_predict_Indices, s0, c0])

prediction_idx = np.argmax(prediction, axis=-1)
#prediction_idx = model.predict([x_predict_input, s0, c0])
print(prediction[0])

print(prediction_idx)


    
#output = [index_to_word[int(i)] for i in prediction_idx]

print("Review is:", EXAMPLES[0])
#print("output:", output)

print("Review  word indices are:", X_predict_Indices[0])
#print("output:", output)


#output1 = [index_to_word[int(i)] for i in prediction_idx]
#print("output1 ", output1)

print(" Review Summary is ")
output = []
for wrd_idx in prediction_idx:
    
    print(wrd_idx, end =" ")
    wrd = index_to_word[wrd_idx]
    print(wrd, end =" ")
    output.append(wrd)
    output.append(" ")

print(" ")



In [None]:
print(index_to_word[132033])
print(index_to_word[350784])

In [None]:
output1 = []


In [None]:
z = np.random.randint(5, size=(2,3))
#print(z)
print(z.shape)

z_ravel = z.ravel()
print("z_ravel = ", z_ravel)
print("z = ", z)


z_ravel = tf.one_hot(z_ravel,5,on_value=None,off_value=None,axis=-1,dtype=tf.int8,name='One Hot')
print("z_oh ", z_oh)

print("z_ravel = ", z_ravel)
print("z = ", z)
#v = np.argmax(z,axis=-1)
#print(v.shape)
#print(v)

In [None]:
z_back = np.reshape(z_ravel, (2,3,5))
print("z_back ", z_back)

In [None]:
# Compare performance on tf.one_hot on n dim array vs one dime vector
import time

z = np.random.randint(5, size=(1000,50))

start1 = time.process_time()
nd = z
print("nd shape =", nd.shape)
nd_oh = tf.one_hot(nd,400000,on_value=None,off_value=None,axis=-1,dtype=tf.int8,name='One Hot')
print("nd_oh shape ", nd_oh.shape)
print("Time taken for nd_oh ", time.process_time() - start1)



In [None]:
start2 = time.process_time()
od = z.ravel()
print("od shape =", od.shape)
od_oh = tf.one_hot(od,400000,on_value=None,off_value=None,axis=-1,dtype=tf.int8,name='One Hot')
print("od_oh shape ", od_oh.shape)
print("Time taken for nd_oh ", time.process_time() - start2)