In [27]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, GRU, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
from google.colab import files

uploaded = files.upload()  # This will open a file selector

df = pd.read_csv("movie_reviews.csv")
df.head()

Saving movie_reviews.csv to movie_reviews.csv


Unnamed: 0,review_id,customer_name,rating,review_date,movie_name,movie_review
0,1,Frank Carter,3,4/30/2025,Pulp Fiction,"Pulp Fiction was okay, not the best but watcha..."
1,2,Scott Simmons,3,3/3/2025,The Godfather,"The Godfather was okay, not the best but watch..."
2,3,Marvin Smith,3,5/15/2025,Inception,It was fine. Inception could have been better.
3,4,Brittney Taylor,5,2/25/2025,Titanic,Titanic was an outstanding film with brilliant...
4,5,Allison Smith,2,5/11/2025,The Godfather,The Godfather was a huge disappointment.


In [3]:
df['sentiment'] = df['rating'].apply(lambda r: 0 if r <= 2 else (1 if r ==3 else 2))
df.head()

Unnamed: 0,review_id,customer_name,rating,review_date,movie_name,movie_review,sentiment
0,1,Frank Carter,3,4/30/2025,Pulp Fiction,"Pulp Fiction was okay, not the best but watcha...",1
1,2,Scott Simmons,3,3/3/2025,The Godfather,"The Godfather was okay, not the best but watch...",1
2,3,Marvin Smith,3,5/15/2025,Inception,It was fine. Inception could have been better.,1
3,4,Brittney Taylor,5,2/25/2025,Titanic,Titanic was an outstanding film with brilliant...,2
4,5,Allison Smith,2,5/11/2025,The Godfather,The Godfather was a huge disappointment.,0


In [7]:
tokenizer = Tokenizer(num_words=1000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['movie_review'])
sequences = tokenizer.texts_to_sequences(df['movie_review'])

In [9]:
max_len = 50
X = pad_sequences(sequences, maxlen = max_len, padding='post', truncating='post')

In [11]:
y = tf.keras.utils.to_categorical(df['sentiment'], num_classes=3)

In [12]:
model = Sequential([
    Embedding(input_dim=500, output_dim=16, input_length=max_len),
    SimpleRNN(32),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])



In [22]:
model_gru = Sequential([
    Embedding(input_dim=500, output_dim=16, input_length=max_len),
    GRU(32),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])



In [28]:
model_lstm = Sequential([
    Embedding(input_dim=500, output_dim=16, input_length=max_len),
    LSTM(32),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])



In [13]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [23]:
model_gru.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [29]:
model_lstm.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [14]:
model.fit(X, y, epochs=50, batch_size=16, validation_split=0.2)

Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 110ms/step - accuracy: 0.3313 - loss: 1.0975 - val_accuracy: 0.5500 - val_loss: 1.0851
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.6075 - loss: 1.0439 - val_accuracy: 0.5500 - val_loss: 1.0587
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.7497 - loss: 0.9543 - val_accuracy: 0.5500 - val_loss: 1.0431
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.7543 - loss: 0.8502 - val_accuracy: 0.5500 - val_loss: 0.9839
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.8764 - loss: 0.7372 - val_accuracy: 0.5500 - val_loss: 0.9165
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.9040 - loss: 0.6350 - val_accuracy: 0.6500 - val_loss: 0.8459
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7c90f93fb470>

In [24]:
model_gru.fit(X, y, epochs=50, batch_size=16, validation_split=0.2)

Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 138ms/step - accuracy: 0.4028 - loss: 1.0967 - val_accuracy: 0.5000 - val_loss: 1.0910
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.4332 - loss: 1.0881 - val_accuracy: 0.5000 - val_loss: 1.0913
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step - accuracy: 0.3689 - loss: 1.0827 - val_accuracy: 0.5000 - val_loss: 1.0941
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step - accuracy: 0.3915 - loss: 1.0804 - val_accuracy: 0.5000 - val_loss: 1.0969
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 0.3368 - loss: 1.0852 - val_accuracy: 0.5000 - val_loss: 1.1017
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.3333 - loss: 1.0794 - val_accuracy: 0.5000 - val_loss: 1.1081
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7c90f41c8fb0>

In [31]:
model_lstm.fit(X, y, epochs=100, batch_size=16, validation_split=0.2)

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6924 - loss: 0.6161 - val_accuracy: 0.5500 - val_loss: 0.7641
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.6420 - loss: 0.6799 - val_accuracy: 0.5500 - val_loss: 0.7603
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.7080 - loss: 0.6075 - val_accuracy: 0.5500 - val_loss: 0.7597
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.7453 - loss: 0.6139 - val_accuracy: 0.5500 - val_loss: 0.7642
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.6785 - loss: 0.5972 - val_accuracy: 0.5500 - val_loss: 0.7649
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.7358 - loss: 0.5946 - val_accuracy: 0.5500 - val_loss: 0.7653
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7c90f66e7e00>

# Function for Prediction

In [15]:
sentiment_labels = {
    0: "Negative",
    1: "Neutral",
    2: "Positive"
}

sentiment_labels

{0: 'Negative', 1: 'Neutral', 2: 'Positive'}

In [16]:
def predict_sentiment(sentence):
  # Convert to Sequence
  seq = tokenizer.texts_to_sequences([sentence])

  # Padding the sequence to match the Training Length
  padded = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')

  # Predict probabilities
  pred = model.predict(padded)

  # Pick up the class with the highest proability
  class_id = pred.argmax(axis=1)[0]

  return sentiment_labels[class_id], pred[0]

In [21]:
text = "This movie is not soo good"

label , prob = predict_sentiment(text)

print("\n Sentence: -->", text)
print("\n Predicted Sentiment: -->", label)
print("\n Probability: -->", prob)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step

 Sentence: --> This movie is not soo good

 Predicted Sentiment: --> Positive

 Probability: --> [0.44158226 0.04491681 0.51350087]


In [25]:
def predict_sentiment_gru(sentence):
  # Convert to Sequence
  seq = tokenizer.texts_to_sequences([sentence])

  # Padding the sequence to match the Training Length
  padded = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')

  # Predict probabilities
  pred = model_gru.predict(padded)

  # Pick up the class with the highest proability
  class_id = pred.argmax(axis=1)[0]

  return sentiment_labels[class_id], pred[0]

In [26]:
text = "This movie is not soo good"

label , prob = predict_sentiment_gru(text)

print("\n Sentence: -->", text)
print("\n Predicted Sentiment: -->", label)
print("\n Probability: -->", prob)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step

 Sentence: --> This movie is not soo good

 Predicted Sentiment: --> Positive

 Probability: --> [0.37880176 0.20943041 0.4117679 ]


In [32]:
def predict_sentiment_lstm(sentence):
  # Convert to Sequence
  seq = tokenizer.texts_to_sequences([sentence])

  # Padding the sequence to match the Training Length
  padded = pad_sequences(seq, maxlen=max_len, padding='post', truncating='post')

  # Predict probabilities
  pred = model_lstm.predict(padded)

  # Pick up the class with the highest proability
  class_id = pred.argmax(axis=1)[0]

  return sentiment_labels[class_id], pred[0]

In [35]:
text = "This movie is bad"

label , prob = predict_sentiment_lstm(text)

print("\n Sentence: -->", text)
print("\n Predicted Sentiment: -->", label)
print("\n Probability: -->", prob)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step

 Sentence: --> This movie is bad

 Predicted Sentiment: --> Negative

 Probability: --> [0.57381654 0.4250961  0.00108737]
