In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds

In [2]:
imdb,info=tfds.load('imdb_reviews',with_info=True,as_supervised=True)

In [3]:
train_batch,test_batch=imdb['train'], imdb['test']

In [4]:
training_text,training_label=[],[]
for text,label in train_batch:
    training_text.append(str(text.numpy()))
    training_label.append(label.numpy())
    print(label)
    print(type(label))
    break
## Testing data
testing_text,testing_label=[],[]
for text,label in test_batch:
    testing_text.append(str(text.numpy()))
    testing_label.append(label.numpy())
    print(label)
    print(type(label))
    break
    

    

tf.Tensor(0, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(1, shape=(), dtype=int64)
<class 'tensorflow.python.framework.ops.EagerTensor'>


### tensor to numpy conversion of labels

In [17]:
train_numpy_label=np.array(training_label)
test_numpy_label=np.array(testing_label)

In [41]:
training_label[:10]

[0, 0, 0, 1, 1, 1, 0, 0, 0, 0]

In [46]:
training_text[3]

"b'This is the kind of film for a snowy Sunday afternoon when the rest of the world can go ahead with its own business as you descend into a big arm-chair and mellow for a couple of hours. Wonderful performances from Cher and Nicolas Cage (as always) gently row the plot along. There are no rapids to cross, no dangerous waters, just a warm and witty paddle through New York life at its best. A family film in every sense and one that deserves the praise it received.'"

## Tokenization

In [8]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [9]:
num_words=10000
embed_dim=16
padding='post'
oov_text='<OOV>'
max_length=120

In [10]:
token=Tokenizer(num_words=num_words,oov_token=oov_text)

In [11]:
token.fit_on_texts(training_text)

In [12]:
word_index=token.word_index

In [13]:
train_seq=token.texts_to_sequences(training_text)
test_seq=token.texts_to_sequences(testing_text)

## Padding sequence

In [14]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [20]:
train_pad_seq=pad_sequences(train_seq,maxlen=max_length,padding=padding,truncating='post')
test_pad_seq=pad_sequences(test_seq,maxlen=max_length,padding=padding,truncating='post')

In [24]:
train_pad_seq.shape

(25000, 120)

### model Creation

In [21]:
from tensorflow.keras.layers import Embedding,Dense,Flatten

In [27]:
model=keras.Sequential()
model.add(Embedding(input_dim=num_words,output_dim=embed_dim,input_length=max_length))
model.add(Flatten())
model.add(Dense(units=6,activation=tf.nn.relu))
model.add(Dense(units=1,activation=tf.nn.sigmoid))
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 120, 16)           160000    
_________________________________________________________________
flatten (Flatten)            (None, 1920)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 11526     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 7         
Total params: 171,533
Trainable params: 171,533
Non-trainable params: 0
_________________________________________________________________


In [28]:
model.fit(train_pad_seq,train_numpy_label,epochs=10,validation_data=(test_pad_seq,test_numpy_label))

Train on 25000 samples, validate on 25000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ffa747432d0>

In [97]:
prediction=['This is a worst movie i have ever seen. i do not recommend this movie at all']
#prediction=['A Feel good movie where all the family can go and enjoy together.']
prediction=["An Extra-ordinary movie. The director is awesome. I will recommend this movie"]
prediction=["Iron Man 2 was the perfect sequel to the first Iron Man film. The film was a combination of both humour and action which is what I love about it. The film has some of the best CGI I have ever seen. Robert Downey Jr was the perfect actor to play Iron Man. The film wasn't as great as the first film but I still enjoyed itThank you Robert Downey Jr and Marvel Studios for making the perfect sequel to my favourite film.RIP Tony Stark 😭😭😭"]
prediction=["And the hits (maybe not!) keep on coming w/this recent remake of Hugh Lofting's beloved character. Having seen the bloated 1967 (& yet Best Picture nominee) original w/Rex Harrison not long ago, this update should've been a walk in the park but within minutes you can see where the CGI ran away w/the production (topping at nearly 180 million dollars) where anthropomorphized animals look the part but sound way too contemporary for their own good (Godfather references!). Robert Downey Jr. is getting some heat here but there are many at the table to point at. Yes I'd like the writer of Traffic (Steven Gaghan who won the Oscar for that script) & the director of the searing drama Syriana to direct a kiddie friendly comedy. Maybe Scorsese can tackle another Casper the Friendly Ghost adventure while we're at it. This hurt!"]
prediction=token.texts_to_sequences(prediction)
prediction_pad_seq=pad_sequences(prediction,maxlen=max_length,padding='post',truncating='post')

In [98]:
prediction_pad_seq.shape

(1, 120)

In [99]:
model.predict_classes(prediction_pad_seq)

array([[0]], dtype=int32)

### We wil try with a new model using Average global pooling instead of Flatten

In [102]:
from tensorflow.keras.layers import GlobalAveragePooling1D

In [103]:
model=keras.Sequential()
model.add(Embedding(input_dim=num_words,output_dim=embed_dim,input_length=max_length))
model.add(GlobalAveragePooling1D())
model.add(Dense(units=5,activation=tf.nn.relu))
model.add(Dense(units=1,activation=tf.nn.sigmoid))
model.compile(optimizer='adam',loss=keras.losses.BinaryCrossentropy(from_logits=True),metrics=['accuracy'])
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 120, 16)           160000    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 85        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6         
Total params: 160,091
Trainable params: 160,091
Non-trainable params: 0
_________________________________________________________________


In [104]:
model.fit(train_pad_seq,train_numpy_label,epochs=10,validation_data=(test_pad_seq,test_numpy_label))

Train on 25000 samples, validate on 25000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ffa7447f250>