# Sentiment analysis model
Using deep learning techniques on the IMDB movie review dataset.

Dataset: Tensorflow imdb dataset

Hugging Face: https://huggingface.co/spaces/alperugurcan/movie-review-analyzer


In [2]:
import tensorflow as tf
from tensorflow.keras import layers, models, datasets

In [3]:
max_features = 5000
(x_train, y_train), (x_test, y_test) = datasets.imdb.load_data(num_words=max_features)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=250)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=250)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


In [4]:
# Build the model
model = models.Sequential([
    layers.Embedding(max_features, 16),
    layers.GlobalAveragePooling1D(),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [5]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [6]:
# Train the model
history = model.fit(x_train, y_train, epochs=10, batch_size=128, validation_split=0.2)


Epoch 1/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 17ms/step - accuracy: 0.5499 - loss: 0.6884 - val_accuracy: 0.6800 - val_loss: 0.6389
Epoch 2/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.7440 - loss: 0.5985 - val_accuracy: 0.8254 - val_loss: 0.4627
Epoch 3/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8323 - loss: 0.4301 - val_accuracy: 0.8536 - val_loss: 0.3699
Epoch 4/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.8659 - loss: 0.3429 - val_accuracy: 0.8678 - val_loss: 0.3330
Epoch 5/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.8829 - loss: 0.3028 - val_accuracy: 0.8608 - val_loss: 0.3245
Epoch 6/10
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.8893 - loss: 0.2786 - val_accuracy: 0.8784 - val_loss: 0.3018
Epoch 7/10
[1m157/157

In [7]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f'\nTest accuracy: {test_acc:.4f}')

782/782 - 2s - 3ms/step - accuracy: 0.8782 - loss: 0.2909

Test accuracy: 0.8782


In [8]:
# Make predictions
predictions = model.predict(x_test[:5])
print("\nPredictions for the first 5 test reviews (>0.5 is positive, <0.5 is negative):")
print(predictions)
print("Actual labels (1 is positive, 0 is negative):")
print(y_test[:5])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 964ms/step

Predictions for the first 5 test reviews (>0.5 is positive, <0.5 is negative):
[[0.24076113]
 [0.9997786 ]
 [0.7203431 ]
 [0.32247764]
 [0.98790205]]
Actual labels (1 is positive, 0 is negative):
[0 1 1 0 1]


In [9]:
model.save('imdb_model.h5')

