In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
!pip install tensorflow-datasets
import tensorflow_datasets as tfds
import numpy as np



In [3]:
import tensorflow_datasets as tfds
imdb_data = tfds.load("imdb_reviews", as_supervised=True)
train_data, test_data = imdb_data['train'], imdb_data['test']


train_sentences, train_labels = [], []
test_sentences, test_labels = [], []

for s, l in train_data:
    train_sentences.append(s.numpy().decode("utf-8"))
    train_labels.append(l.numpy())

for s, l in test_data:
    test_sentences.append(s.numpy().decode("utf-8"))
    test_labels.append(l.numpy())

In [4]:
vocab_size = 10000
max_len = 200

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(train_sentences)

X_train = tokenizer.texts_to_sequences(train_sentences)
X_train = pad_sequences(X_train, maxlen=max_len, padding="post", truncating="post")

X_test = tokenizer.texts_to_sequences(test_sentences)
X_test = pad_sequences(X_test, maxlen=max_len, padding="post", truncating="post")

y_train = tf.convert_to_tensor(train_labels)
y_test = tf.convert_to_tensor(test_labels)

In [16]:
model = Sequential([
    Embedding(vocab_size, 64, input_length=max_len),
    LSTM(64, return_sequences=True),
    Dropout(0.5),
    LSTM(32),
    Dense(32, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")
])

In [17]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 64)           640000    
                                                                 
 lstm (LSTM)                 (None, 200, 64)           33024     
                                                                 
 dropout (Dropout)           (None, 200, 64)           0         
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 32)                1056      
                                                                 
 dropout_1 (Dropout)         (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 3

In [20]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5,
    batch_size=128,
    verbose=1
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [21]:
def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")
    pred = model.predict(padded)[0][0]
    sentiment = "positive" if pred >= 0.5 else "negative"
    print(f"Input: {text}")
    print(f"Sentiment: {sentiment}, Score: {pred:.4f}")
    print("-" * 50)


In [22]:
test_samples = [
    "the movie was good",
    "the movie was terrible and boring",
    "fantastic acting and great direction",
    "the film is a disgrace, a lazy, predictable, and boring production that tries too hard to be funny and fails to deliver, resulting in a movie that is insulting to the audience and a waste of their time."
]

for s in test_samples:
    predict_sentiment(s)

Input: the movie was good
Sentiment: positive, Score: 0.8691
--------------------------------------------------
Input: the movie was terrible and boring
Sentiment: negative, Score: 0.3612
--------------------------------------------------
Input: fantastic acting and great direction
Sentiment: positive, Score: 0.8691
--------------------------------------------------
Input: the film is a disgrace, a lazy, predictable, and boring production that tries too hard to be funny and fails to deliver, resulting in a movie that is insulting to the audience and a waste of their time.
Sentiment: negative, Score: 0.3612
--------------------------------------------------


In [49]:
import pickle

# Save model
model.save("sentiment_model.h5")

# Save tokenizer
with open("tokenizer.pickle", "wb") as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [23]:
# Instead of model.save("sentiment_model.h5")
model.save("sentiment_model")  # This creates a folder instead of a single .h5 file

INFO:tensorflow:Assets written to: sentiment_model\assets


INFO:tensorflow:Assets written to: sentiment_model\assets
