In [53]:
from google.colab import drive
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense
import numpy as np

In [37]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
try:
    df = pd.read_csv('/content/drive/MyDrive/Sem VIII Practical/LPV/DL/Datasets/IMDB Dataset.csv')
except FileNotFoundError:
    print("Error: 'imdb_reviews.csv' not found. Please check the path.")
    exit()

In [39]:
print(df.head())

                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [40]:
max_words = 10000
max_len = 500

In [41]:
df['sentiment_encoded'] = df['sentiment'].map({'positive': 1, 'negative': 0})

In [42]:
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['review'])
sequences = tokenizer.texts_to_sequences(df['review'])
data = pad_sequences(sequences, maxlen=max_len)
labels = np.array(df['sentiment_encoded'])

In [43]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [44]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))



In [45]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [52]:
history = model.fit(x_train, y_train, epochs=15, batch_size=64, validation_split=0.2)

Epoch 1/15
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 129ms/step - accuracy: 0.9988 - loss: 0.0039 - val_accuracy: 0.8621 - val_loss: 0.9805
Epoch 2/15
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 124ms/step - accuracy: 1.0000 - loss: 1.3064e-04 - val_accuracy: 0.8608 - val_loss: 1.0920
Epoch 3/15
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 126ms/step - accuracy: 1.0000 - loss: 1.5661e-05 - val_accuracy: 0.8619 - val_loss: 1.1249
Epoch 4/15
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 126ms/step - accuracy: 1.0000 - loss: 8.0049e-06 - val_accuracy: 0.8626 - val_loss: 1.1549
Epoch 5/15
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 125ms/step - accuracy: 1.0000 - loss: 5.7944e-06 - val_accuracy: 0.8625 - val_loss: 1.1830
Epoch 6/15
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 126ms/step - accuracy: 1.0000 - loss: 4.2076e-06 - val_accuracy: 0.8624 - val_los

In [54]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test accuracy: {accuracy}")
print(f"Test loss: {loss}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.8596 - loss: 1.2801
Test accuracy: 0.862500011920929
Test loss: 1.2951023578643799


In [55]:
predictions = model.predict(x_test)
predictions = np.where(predictions > 0.5, 1, 0)
decoded_predictions = np.where(predictions == 1, 'positive', 'negative')
print(decoded_predictions)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step
[['negative']
 ['negative']
 ['negative']
 ...
 ['positive']
 ['negative']
 ['positive']]


In [56]:
def predict_review(new_review):
  new_review_sequence = tokenizer.texts_to_sequences([new_review])
  new_review_padded = pad_sequences(new_review_sequence, maxlen=max_len)
  prediction = model.predict(new_review_padded)
  prediction_binary = np.where(prediction > 0.5, 1, 0)
  decoded_prediction = np.where(prediction_binary == 1, 'positive', 'negative')
  return decoded_prediction[0][0]  # Return the string prediction

In [59]:
user_review = input("Enter a review: ")
predicted_sentiment = predict_review(user_review)
print(f"Predicted sentiment: {predicted_sentiment}")

Enter a review: The movie was a very awesome. The choise of actor was also very good
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Predicted sentiment: positive
