In [37]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings('ignore')

In [9]:
max_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words)

In [15]:
max_len = 150
x_train = pad_sequences(x_train, maxlen = max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [29]:
def sequence_to_text(sequence):
    word_index = imdb.get_word_index()
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    decoded_review =' '.join([reverse_word_index.get(i - 3, '?') for i in sequence])
    return decoded_review

In [31]:
for i in range(3):
    print(f"Example {i+1}:\n{sequence_to_text(x_train[i])}\n")

Example 1:
it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all

Example 2:
the worst ever made the plot is paper thin and ridiculous the acting is an abomination the script is completely laughable the best is the end showdown with the cop and how he worked out who the killer is it's just so damn terribly written the clot

In [39]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(SpatialDropout1D(0.3))
model.add(Bidirectional(LSTM(100, dropout=0.3, recurrent_dropout=0.3)))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [43]:
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

batch_size = 32
epochs = 10
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size,
          validation_data=(x_test, y_test), callbacks=[early_stopping])

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 422ms/step - accuracy: 0.6661 - loss: 0.5879 - val_accuracy: 0.8363 - val_loss: 0.3806
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 450ms/step - accuracy: 0.8420 - loss: 0.3775 - val_accuracy: 0.8456 - val_loss: 0.3742
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m540s[0m 689ms/step - accuracy: 0.8864 - loss: 0.2862 - val_accuracy: 0.8533 - val_loss: 0.3614
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m333s[0m 426ms/step - accuracy: 0.8934 - loss: 0.2658 - val_accuracy: 0.8605 - val_loss: 0.3417
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m353s[0m 388ms/step - accuracy: 0.9246 - loss: 0.1970 - val_accuracy: 0.8598 - val_loss: 0.3561
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m335s[0m 405ms/step - accuracy: 0.9400 - loss: 0.1604 - val_accuracy: 0.8578 - val_loss: 0.3982


<keras.src.callbacks.history.History at 0x254c0f73e90>

In [45]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Accuracy: {accuracy*100:.2f}%')                               

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 67ms/step - accuracy: 0.8586 - loss: 0.3472
Accuracy: 86.05%


In [47]:
new_texts = ["This movie is great!","The plot is confusing.","Amazing film with brilliant performances."]

#Tokenize and pad the new texts
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(new_texts)
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_padded = pad_sequences(new_sequences, maxlen=max_len)

#Make predictions
predictions = model.predict(new_padded)

#Display predictions
for i, text in enumerate(new_texts):
    sentiment = "Positive" if predictions[i] > 0.5 else "Negative"
    print(f"Text: '{text}'\nPredicted Sentiment: {sentiment}\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Text: 'This movie is great!'
Predicted Sentiment: Positive

Text: 'The plot is confusing.'
Predicted Sentiment: Positive

Text: 'Amazing film with brilliant performances.'
Predicted Sentiment: Positive

