# DNN IMDB Sentiment Analysis

In [37]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [38]:
data = keras.datasets.imdb

In [39]:
(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words = 88000)

In [40]:
# train_data[0]

In [41]:
print(train_data.shape)
print(len(train_data[0]))
print(len(train_data[1]))
print(len(train_data[24999]))

(25000,)
218
189
153


In [42]:
word_index = data.get_word_index()
# word_index

In [43]:
word_index['this']

11

In [44]:
word_index['funny']

160

In [45]:
word_index['lol']

3999

In [46]:
word_index["<PAD>"] = 0

In [47]:
reverse_word_index = {value:key for key, value in word_index.items()}

In [48]:
# reverse_word_index

In [49]:
reverse_word_index[1]

'the'

In [50]:
reverse_word_index[2]

'and'

In [51]:
reverse_word_index[0]

'<PAD>'

In [52]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value = 0, padding = 'post', maxlen = 250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value = 0, padding = 'post', maxlen = 250)

In [53]:
train_data.shape

(25000, 250)

In [54]:
model = keras.Sequential()
model.add(keras.layers.Embedding(88000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation = 'relu'))
model.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [55]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [56]:
model.fit(train_data, train_labels, epochs = 10, batch_size = 512)

Epoch 1/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.5322 - loss: 0.6916
Epoch 2/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7133 - loss: 0.6688
Epoch 3/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7721 - loss: 0.6012
Epoch 4/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8177 - loss: 0.5115
Epoch 5/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8558 - loss: 0.4272
Epoch 6/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8776 - loss: 0.3597
Epoch 7/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8810 - loss: 0.3238
Epoch 8/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8970 - loss: 0.2873
Epoch 9/10
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x3114f38e0>

In [57]:
results = model.evaluate(test_data, test_labels)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299us/step - accuracy: 0.8771 - loss: 0.3014


In [58]:
def review_encode(s):
    encoded = []
    for word in s.split():
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            pass
    return encoded

In [59]:
review_encode('hello this movie is so amazing')

[4822, 11, 17, 6, 35, 477]

In [62]:
review = input('Enter your review about the movie: ')
encode = review_encode(review)
encode = keras.preprocessing.sequence.pad_sequences([encode], value = word_index['<PAD>'], padding = 'post', maxlen = 250)
predict = model.predict(encode)
print(predict[0])

Enter your review about the movie:  good movie


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[0.62465495]


In [64]:
if predict[0] > 0.62:
    print('Positive review')
else:
    print('Negative review')

Positive review
