In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer




In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
data = [
      ("I absolutely loved this movie!", "positive"),
    ("The acting was terrible and the plot was boring", "negative"),
    ("This film was masterpeace.", "positive"),
    ("I couldn't stand watching it, a complete waste of time", "negative"),
    ("The cinematography was stunning, a must-watch!", "positive")
]

In [4]:
reviews, sentiments = zip(*data)

In [5]:
reviews

('I absolutely loved this movie!',
 'The acting was terrible and the plot was boring',
 'This film was masterpeace.',
 "I couldn't stand watching it, a complete waste of time",
 'The cinematography was stunning, a must-watch!')

In [6]:
sentiments

('positive', 'negative', 'positive', 'negative', 'positive')

In [7]:
tokens = Tokenizer()
tokens.fit_on_texts(reviews)

In [8]:
tokens.word_index

{'was': 1,
 'the': 2,
 'i': 3,
 'this': 4,
 'a': 5,
 'absolutely': 6,
 'loved': 7,
 'movie': 8,
 'acting': 9,
 'terrible': 10,
 'and': 11,
 'plot': 12,
 'boring': 13,
 'film': 14,
 'masterpeace': 15,
 "couldn't": 16,
 'stand': 17,
 'watching': 18,
 'it': 19,
 'complete': 20,
 'waste': 21,
 'of': 22,
 'time': 23,
 'cinematography': 24,
 'stunning': 25,
 'must': 26,
 'watch': 27}

In [9]:
tokens.word_counts

OrderedDict([('i', 2),
             ('absolutely', 1),
             ('loved', 1),
             ('this', 2),
             ('movie', 1),
             ('the', 3),
             ('acting', 1),
             ('was', 4),
             ('terrible', 1),
             ('and', 1),
             ('plot', 1),
             ('boring', 1),
             ('film', 1),
             ('masterpeace', 1),
             ("couldn't", 1),
             ('stand', 1),
             ('watching', 1),
             ('it', 1),
             ('a', 2),
             ('complete', 1),
             ('waste', 1),
             ('of', 1),
             ('time', 1),
             ('cinematography', 1),
             ('stunning', 1),
             ('must', 1),
             ('watch', 1)])

In [8]:
vocab_size = len(tokens.word_index)+1
vocab_size

28

In [9]:
seq = tokens.texts_to_sequences(reviews)
seq

[[3, 6, 7, 4, 8],
 [2, 9, 1, 10, 11, 2, 12, 1, 13],
 [4, 14, 1, 15],
 [3, 16, 17, 18, 19, 5, 20, 21, 22, 23],
 [2, 24, 1, 25, 5, 26, 27]]

In [10]:
max_length = max([len(i) for i in seq])
max_length

10

In [11]:
pad_seq = sequence.pad_sequences(seq, maxlen=max_length, padding='post')

pad_seq

array([[ 3,  6,  7,  4,  8,  0,  0,  0,  0,  0],
       [ 2,  9,  1, 10, 11,  2, 12,  1, 13,  0],
       [ 4, 14,  1, 15,  0,  0,  0,  0,  0,  0],
       [ 3, 16, 17, 18, 19,  5, 20, 21, 22, 23],
       [ 2, 24,  1, 25,  5, 26, 27,  0,  0,  0]])

In [12]:
labels = np.array([1 if i=='positive' else 0 for i in sentiments])
labels

array([1, 0, 1, 0, 1])

In [13]:
model = model = Sequential([
    Embedding(vocab_size,20, input_length=max_length),
    SimpleRNN(20),
    Dense(1, activation='sigmoid')
])




In [14]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [15]:
model.fit(pad_seq, labels, epochs=15, verbose=1)

Epoch 1/15


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x1a6677fab90>

In [16]:
new_sent = 'I eas blown away by the incredible performance and gripping storyline'

In [17]:
new_seq = tokens.texts_to_sequences([new_sent])

In [18]:
new_pad_seq = sequence.pad_sequences(new_seq, maxlen=max_length, padding='post')

In [19]:
new_pad_seq

array([[ 3,  2, 11,  0,  0,  0,  0,  0,  0,  0]])

In [20]:
pred = model.predict(new_pad_seq)



In [22]:
sentiment = 'Positive' if pred >= 0.5 else 'Negative'
sentiment

'Positive'