In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SimpleRNN

In [None]:
max_features = 10000
max_len = 500
batch_size = 32
embedding_size = 128
rnn_units = 64
lstm_units = 64
epochs = 3

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
X_train

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ...,
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

In [None]:
y_train

array([1, 0, 0, ..., 0, 1, 0])

In [None]:
model = Sequential()

# model.add(Embedding(max_features, embedding_size, input_length=max_len))
# model.add(SimpleRNN(rnn_units, dropout=0.2, recurrent_dropout=0.2))
# model.add(Dense(1, activation='sigmoid'))

#LSTM
model.add(Embedding(max_features, embedding_size, input_length=max_len))
model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7ad2d724a740>

In [None]:
model.evaluate(X_test, y_test)



[0.32485276460647583, 0.8624799847602844]

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
def predict_sentiment(sentence):
    # Tokenize the sentence
    tokenizer = Tokenizer(num_words=max_features)
    tokenizer.fit_on_texts([sentence])
    sequence_data = tokenizer.texts_to_sequences([sentence])

    # Pad the sequence
    padded_sequence = pad_sequences(sequence_data, maxlen=max_len)

    # Predict sentiment
    prediction = model.predict(padded_sequence)
    sentiment = "positive" if prediction[0][0] > 0.5 else "negative"

    return sentiment, prediction[0][0]

# Example usage
while True:
    sentence = input("Enter a sentence (or 'exit' to quit): ")
    if sentence.lower() == 'exit':
        break
    sentiment, score = predict_sentiment(sentence)
    print(f"Sentence: '{sentence}'")
    print(f"Predicted sentiment: {sentiment} (Score: {score})")

Sentence: 'hate'
Predicted sentiment: negative (Score: 0.45256146788597107)
Sentence: 'I love it'
Predicted sentiment: negative (Score: 0.44641581177711487)
Sentence: 'love'
Predicted sentiment: negative (Score: 0.45256146788597107)
Sentence: 'bad'
Predicted sentiment: negative (Score: 0.45256146788597107)
Sentence: 'like'
Predicted sentiment: negative (Score: 0.45256146788597107)
