In [4]:
import numpy as np
from tensorflow.keras.datasets import imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)

In [7]:
print("Training data")
print(x_train.shape)
print(y_train.shape)

Training data
(25000,)
(25000,)


In [8]:
print("Review:",x_train[0])
print('Label:', y_train[0])

Review: [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 2, 19, 178, 32]
Label: 1


In [10]:
word_2_index = imdb.get_word_index()
index = dict([(value, key) for (key, value) in word_2_index.items()])

def decode_review(text):
    '''The word corresponding to each integer in the text is substituted after it has been found in the index.
    '''
    return ' '.join([index.get(i, '?') for i in text])
decode_review(x_train[0])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


"the as you with out themselves powerful lets loves their becomes reaching had journalist of lot from anyone to have after out atmosphere never more room and it so heart shows to years of every never going and help moments or of every chest visual movie except her was several of enough more with is now current film as you of mine potentially unfortunately of you than him that with out themselves her get for was camp of you movie sometimes movie that with scary but and to story wonderful that in seeing in character to of 70s and with heart had shadows they of here that with her serious to have does when from why what have critics they is you that isn't one will very to as itself with other and in of seen over and for anyone of and br show's to whether from than out themselves history he name half some br of and odd was two most of mean for 1 any an boat she he should is thought and but of script you not while history he heart to real at and but when from one bit then have two of script 

In [11]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv1D,MaxPooling1D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing import sequence

In [13]:
## imbd dataset but only keep the top 5000 words, zero the rest
words = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=words)
# Only floats can be fed into a tensor
y_train = np.asarray(y_train).astype('float32')
y_test = np.asarray(y_test).astype('float32')

max_words = 500
x_train = sequence.pad_sequences(x_train, maxlen=max_words)
x_test = sequence.pad_sequences(x_test, maxlen=max_words)

In [15]:
model = Sequential()
model.add(Embedding(words, 32, input_length=max_words))
model.add(Conv1D(32, 3, padding='same', activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(300, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 32)           160000    
                                                                 
 conv1d (Conv1D)             (None, 500, 32)           3104      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 250, 32)          0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 8000)              0         
                                                                 
 dense (Dense)               (None, 300)               2400300   
                                                                 
 dense_1 (Dense)             (None, 1)                 301       
                                                        

In [16]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=3, batch_size=130, verbose=2)

Epoch 1/3
193/193 - 45s - loss: 0.4381 - accuracy: 0.7706 - val_loss: 0.2700 - val_accuracy: 0.8883 - 45s/epoch - 235ms/step
Epoch 2/3
193/193 - 45s - loss: 0.2094 - accuracy: 0.9183 - val_loss: 0.2856 - val_accuracy: 0.8828 - 45s/epoch - 233ms/step
Epoch 3/3
193/193 - 42s - loss: 0.1458 - accuracy: 0.9470 - val_loss: 0.3006 - val_accuracy: 0.8814 - 42s/epoch - 216ms/step


<keras.callbacks.History at 0x1cb208e6c40>

In [18]:
# Fit the dataset with model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=3, batch_size=130, verbose=2)
# Final evaluation of the model
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Epoch 1/3
193/193 - 43s - loss: 0.0017 - accuracy: 0.9999 - val_loss: 0.6411 - val_accuracy: 0.8734 - 43s/epoch - 221ms/step
Epoch 2/3
193/193 - 40s - loss: 7.1718e-04 - accuracy: 1.0000 - val_loss: 0.6851 - val_accuracy: 0.8735 - 40s/epoch - 209ms/step
Epoch 3/3
193/193 - 41s - loss: 3.9568e-04 - accuracy: 1.0000 - val_loss: 0.7252 - val_accuracy: 0.8744 - 41s/epoch - 210ms/step
Accuracy: 87.44%


In [19]:
predict_x=model.predict(x_test)
y_pred=np.argmax(predict_x,axis=1)



In [20]:
from sklearn import metrics
accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy

0.5