In [2]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
np.random.seed(42)

In [3]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=5000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
# Pad the sequences to a fixed length
max_len = 500
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [5]:
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

2023-04-09 18:21:11.828335: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [7]:
model.fit(x_train, y_train, batch_size=64, epochs=10, validation_data=(x_test, y_test), callbacks=[ModelCheckpoint('model.h5', save_best_only=True)])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fda551e78b0>

In [8]:
y_pred = model.predict(x_test)
y_pred = np.round(y_pred).flatten()
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('F1 Score:', f1_score(y_test, y_pred))

Accuracy: 0.8356
Precision: 0.8564146134239592
Recall: 0.8064
F1 Score: 0.830655129789864


# Use:

In [10]:
# Load the trained model
from tensorflow.keras.models import load_model
model = load_model('model.h5')

In [11]:
# Define the word-to-index mapping
word_to_index = imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [12]:
# Define a function to preprocess user inputs
def preprocess_input(text):
    # Convert the text to lowercase
    text = text.lower()
    # Split the text into words
    words = text.split()
    # Convert each word to its index in the word-to-index mapping
    indexes = [word_to_index.get(word, 0) for word in words]
    # Pad the sequence to the same length as the training data
    padded_sequence = pad_sequences([indexes], maxlen=max_len)
    return padded_sequence

In [47]:
# Define a function to predict the sentiment of user inputs
def predict_sentiment(text):
    # Preprocess the input text
    padded_sequence = preprocess_input(text)
    # Use the model to predict the sentiment
    proba = model.predict(padded_sequence)[0][0]
    # Convert the probability to a binary sentiment classification
    if proba >= 0.5:
        sentiment = 'positive'
    else:
        sentiment = 'negative'
    return sentiment

In [50]:
# Test the function on some example inputs
print(predict_sentiment('great'))
print(predict_sentiment('terrible!'))

positive
positive
