In [3]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense , GlobalAveragePooling1D
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd

In [5]:
num_words = 10000
(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words = num_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [7]:
max_len = 200

x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test,maxlen=max_len)


In [9]:
model = Sequential([
    Embedding(input_dim=num_words, output_dim=32),
    GlobalAveragePooling1D(),
    Dense(32,activation="relu"),
    Dense(1,activation="sigmoid")
])

In [10]:
model.compile(optimizer="adam",loss="binary_crossentropy",metrics= ["accuracy"])
model.fit(x_train,y_train,epochs=5, batch_size=512,validation_split=0.2)


Epoch 1/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.5544 - loss: 0.6904 - val_accuracy: 0.6768 - val_loss: 0.6714
Epoch 2/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.6827 - loss: 0.6615 - val_accuracy: 0.7568 - val_loss: 0.6109
Epoch 3/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - accuracy: 0.7709 - loss: 0.5884 - val_accuracy: 0.7892 - val_loss: 0.5182
Epoch 4/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step - accuracy: 0.8232 - loss: 0.4869 - val_accuracy: 0.8310 - val_loss: 0.4332
Epoch 5/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.8547 - loss: 0.4024 - val_accuracy: 0.8194 - val_loss: 0.4036


<keras.src.callbacks.history.History at 0x793d88fdd910>

In [19]:
loss,acc = model.evaluate(x_test,y_test)
print(acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.8130 - loss: 0.4078
0.8129600286483765


In [15]:
word_index = imdb.get_word_index()
reverse_word_index = {value:key for key, value in word_index.items()}

def decode_review(enc_review):
  return " ".join([reverse_word_index.get(i-3,"?") for i in enc_review])

In [18]:
sample_indices = range(100)
sample_reviews = x_test[sample_indices]
sample_texts = [decode_review(x_test[i]) for i in sample_indices]

pred = model.predict(sample_reviews)
pred_labels = ["positive" if p >= 0.5 else "negative" for p in pred.flatten()]

ans = pd.DataFrame({
    "Review" : sample_texts,
    "Prediction" : pred_labels
})

ans.to_csv("ans.csv",index=False)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
