In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

# 1. Data Preparation
imdb = keras.datasets.imdb
(vocab_size) = 10000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)

# Preprocess the data - Padding the arrays so they all have the same length
max_length = 256
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=0, padding='post', maxlen=max_length)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=0, padding='post', maxlen=max_length)

# 2. Model Building
model = keras.Sequential([
    keras.layers.Embedding(vocab_size, 16, input_length=max_length),
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 3. Training
x_val = train_data[:10000]
partial_x_train = train_data[10000:]
y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

history = model.fit(partial_x_train, partial_y_train, epochs=40, batch_size=512, validation_data=(x_val, y_val), verbose=1)

# 4. Evaluation
results = model.evaluate(test_data, test_labels, verbose=2)
print(results)




Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


Epoch 1/40


Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
782/782 - 1s - loss: 0.3237 - accuracy: 0.8728 - 771ms/epoch - 986us/step
[0.3237169682979584, 0.872759997844696]


In [4]:
# Load the IMDb word index
word_index = imdb.get_word_index()

# Create a reverse word index
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

# Function to encode a given text into integers
def encode_text(text):
    # IMDb reserves the first 3 indices for special tokens
    encoded = [1]  # Start with index 1 for the start of the sequence
    for word in text.split():
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()] + 3)  # Offset by 3 as IMDb datasets have reserved indices at the start
        else:
            encoded.append(2)  # Use 2 for unknown words
    return encoded


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [12]:
# Function to preprocess new reviews
def preprocess_review(text):
    encoded = encode_text(text)
    padded = keras.preprocessing.sequence.pad_sequences([encoded], value=0, padding='post', maxlen=max_length)  # Use the same max_length as during training
    return padded

# Example review
new_review = "This movie was EXTREMELY BAD AND I HATED IT"

# Preprocess the review
preprocessed_review = preprocess_review(new_review)

# Making a prediction
prediction = model.predict(preprocessed_review)

print("Prediction (0 for negative, 1 for positive):", prediction[0])

# Interpreting the prediction
if prediction[0] > 0.5:
    print("Positive sentiment")
else:
    print("Negative sentiment")


Prediction (0 for negative, 1 for positive): [0.2968883]
Negative sentiment
