In [0]:
# it is for increasing RAM to 25GB in Google Colab
values = []
while True:
  values.append("hari")

# sentiment classification on IMDB dataset

 Each movie review is a variable sequence of words and the sentiment of each movie review must be classified.

# Word Embedding

a popular technique when working with text called word embedding. This is a technique where words are encoded as real-valued vectors in a high dimensional space, where the similarity between words in terms of meaning translates to closeness in the vector space.

Keras provides a convenient way to convert positive integer representations of words into a word embedding by an Embedding layer.

We will map each word onto a 32 length real valued vector. We will also limit the total number of words that we are interested in modeling to the 5000 most frequent words, and zero out the rest. Finally, the sequence length (number of words) in each review varies, so we will constrain each review to be 500 words, truncating long reviews and pad the shorter reviews with zero values.

The first layer is the Embedded layer that uses 32 length vectors to represent each word. The next layer is the LSTM layer with 100 memory units (smart neurons). Finally, because this is a classification problem we use a Dense output layer with a single neuron and a sigmoid activation function to make 0 or 1 predictions for the two classes (good and bad) in the problem.

Because it is a binary classification problem, log loss is used as the loss function (binary_crossentropy in Keras). The efficient ADAM optimization algorithm is used. The model is fit for only 2 epochs because it quickly overfits the problem. A large batch size of 64 reviews is used to space out weight updates.

In [0]:
# lstm for sequence classification in the imdb dataset
import numpy
from keras.models import Sequential
from keras.layers import Dense,LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.datasets import imdb
# fixing random seed for reproducability
numpy.random.seed(7) 
# load the dataset,but keep ony the top n_words ,zero the rest
top_words = 5000
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words = top_words)
# truncate and pad input seqences
max_review_length = 500
X_train = sequence.pad_sequences(X_train,maxlen = max_review_length)
X_test = sequence.pad_sequences(X_test,maxlen = max_review_length)
#creating the model
embedding_vector_length = 32
model = Sequential()
model.add(Embedding(top_words,embedding_vector_length,input_length = max_review_length))
model.add(LSTM(100))
model.add(Dense(1,activation = 'sigmoid'))
model.compile(loss='binary_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
print(model.summary())
model.fit(X_train,y_train,epochs= 3,batch_size = 64)
# final evaluation of the model
scores = model.evaluate(X_test,y_test,verbose = 0)
print("accuracy : %.2f.%%" % (scores[1] * 100))






Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 101       
Total params: 213,301
Trainable params: 213,301
Non-trainable params: 0
_________________________________________________________________
None



Epoch 1/3





Epoch 2/3
Epoch 3/3
accuracy : 86.20.%


# added dense layer with 200 nodes and Dropout with 0.2 
# with this configuration accuracy is increased by 1%

In [2]:
# lstm for sequence classification in the imdb dataset
import numpy
from keras.models import Sequential
from keras.layers import Dense,LSTM
from keras.layers import Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.datasets import imdb
# fixing random seed for reproducability
numpy.random.seed(7) 
# load the dataset,but keep ony the top n_words ,zero the rest
top_words = 5000
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words = top_words)
# truncate and pad input seqences
max_review_length = 500
X_train = sequence.pad_sequences(X_train,maxlen = max_review_length)
X_test = sequence.pad_sequences(X_test,maxlen = max_review_length)
#creating the model
embedding_vector_length = 32
model = Sequential()
model.add(Embedding(top_words,embedding_vector_length,input_length = max_review_length))
model.add(LSTM(100))
model.add(Dense(100,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation = 'sigmoid'))
model.compile(loss='binary_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
print(model.summary())
model.fit(X_train,y_train,epochs= 3,batch_size = 64)
# final evaluation of the model
scores = model.evaluate(X_test,y_test,verbose = 0)
print("accuracy : %.2f.%%" % (scores[1] * 100))


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               53200     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 101       
Total params: 223,401
Trainable params: 

# sentiment classification with GRU
# removed LSTM layer and added GRU layer

In [3]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dropout,Dense
from keras.layers import GRU
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
top_words = 5000
# loading the dataset
(train_x,train_y),(test_x,test_y) = imdb.load_data(num_words = top_words)
# truncate and pad input seqences
max_review_length = 500
X_train = sequence.pad_sequences(train_x,maxlen = max_review_length)
X_test = sequence.pad_sequences(test_x,maxlen = max_review_length)
#creating the model
embedding_vector_length = 32
model = Sequential()
model.add(Embedding(top_words,embedding_vector_length,input_length = max_review_length))
model.add(GRU(100))
model.add(Dense(100,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation = 'sigmoid'))
model.compile(loss='binary_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
print(model.summary())
model.fit(X_train,train_y,epochs= 3,batch_size = 64)
# final evaluation of the model
scores = model.evaluate(X_test,test_y,verbose = 0)
print("accuracy : %.2f.%%" % (scores[1] * 100))


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
gru_2 (GRU)                  (None, 100)               39900     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 101       
Total params: 210,101
Trainable params: 210,101
Non-trainable params: 0
_________________________________________________________________
None



Epoch 1/3





Epoch 2/3
Epoch 3/3
accuracy : 87.13.%
