In [11]:
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.datasets import imdb
from keras import Sequential
from keras.layers import Dense,SimpleRNN,Embedding,Flatten

In [3]:
docs = ["Amazing product, exceeded my expectations!",
"Quick service and friendly staff.",
"Good quality, but a bit overpriced.",
"The app is user-friendly and intuitive.",
"Disappointed with the customer support.",
"Beautiful design and easy to use.",
"Delivery was fast and efficient.",
"Not as described, poor quality.",
"Great experience, will buy again.",
"Fantastic value for the money."]

In [4]:
docs

['Amazing product, exceeded my expectations!',
 'Quick service and friendly staff.',
 'Good quality, but a bit overpriced.',
 'The app is user-friendly and intuitive.',
 'Disappointed with the customer support.',
 'Beautiful design and easy to use.',
 'Delivery was fast and efficient.',
 'Not as described, poor quality.',
 'Great experience, will buy again.',
 'Fantastic value for the money.']

In [5]:
## Split sentence into words
tokenizer = Tokenizer(oov_token='<nothing>')
tokenizer.fit_on_texts(docs)

In [6]:
tokenizer.word_counts

OrderedDict([('amazing', 1),
             ('product', 1),
             ('exceeded', 1),
             ('my', 1),
             ('expectations', 1),
             ('quick', 1),
             ('service', 1),
             ('and', 4),
             ('friendly', 2),
             ('staff', 1),
             ('good', 1),
             ('quality', 2),
             ('but', 1),
             ('a', 1),
             ('bit', 1),
             ('overpriced', 1),
             ('the', 3),
             ('app', 1),
             ('is', 1),
             ('user', 1),
             ('intuitive', 1),
             ('disappointed', 1),
             ('with', 1),
             ('customer', 1),
             ('support', 1),
             ('beautiful', 1),
             ('design', 1),
             ('easy', 1),
             ('to', 1),
             ('use', 1),
             ('delivery', 1),
             ('was', 1),
             ('fast', 1),
             ('efficient', 1),
             ('not', 1),
             ('as', 1),
           

In [7]:
tokenizer.word_index


{'<nothing>': 1,
 'and': 2,
 'the': 3,
 'friendly': 4,
 'quality': 5,
 'amazing': 6,
 'product': 7,
 'exceeded': 8,
 'my': 9,
 'expectations': 10,
 'quick': 11,
 'service': 12,
 'staff': 13,
 'good': 14,
 'but': 15,
 'a': 16,
 'bit': 17,
 'overpriced': 18,
 'app': 19,
 'is': 20,
 'user': 21,
 'intuitive': 22,
 'disappointed': 23,
 'with': 24,
 'customer': 25,
 'support': 26,
 'beautiful': 27,
 'design': 28,
 'easy': 29,
 'to': 30,
 'use': 31,
 'delivery': 32,
 'was': 33,
 'fast': 34,
 'efficient': 35,
 'not': 36,
 'as': 37,
 'described': 38,
 'poor': 39,
 'great': 40,
 'experience': 41,
 'will': 42,
 'buy': 43,
 'again': 44,
 'fantastic': 45,
 'value': 46,
 'for': 47,
 'money': 48}

In [8]:
tokenizer.document_count

10

In [9]:
## text to vector
sequences = tokenizer.texts_to_sequences(docs)

In [10]:
## diffent sized squences
sequences

[[6, 7, 8, 9, 10],
 [11, 12, 2, 4, 13],
 [14, 5, 15, 16, 17, 18],
 [3, 19, 20, 21, 4, 2, 22],
 [23, 24, 3, 25, 26],
 [27, 28, 2, 29, 30, 31],
 [32, 33, 34, 2, 35],
 [36, 37, 38, 39, 5],
 [40, 41, 42, 43, 44],
 [45, 46, 47, 3, 48]]

In [12]:
## Added Padding
sequences = pad_sequences(sequences)

In [13]:
sequences

array([[ 0,  0,  6,  7,  8,  9, 10],
       [ 0,  0, 11, 12,  2,  4, 13],
       [ 0, 14,  5, 15, 16, 17, 18],
       [ 3, 19, 20, 21,  4,  2, 22],
       [ 0,  0, 23, 24,  3, 25, 26],
       [ 0, 27, 28,  2, 29, 30, 31],
       [ 0,  0, 32, 33, 34,  2, 35],
       [ 0,  0, 36, 37, 38, 39,  5],
       [ 0,  0, 40, 41, 42, 43, 44],
       [ 0,  0, 45, 46, 47,  3, 48]], dtype=int32)

In [14]:
## Loading IMDB dataset and split that data into train and test
(X_train,y_train),(X_test,y_test) = imdb.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [15]:
len(X_train[2])

141

In [16]:
len(X_train[4])

147

In [17]:
## Trimming each and every review upto only 50 words, to avoid computational time
X_train = pad_sequences(X_train,padding='post',maxlen=50)
X_test = pad_sequences(X_test,padding='post',maxlen=50)

In [18]:
X_train[0]

array([2071,   56,   26,  141,    6,  194, 7486,   18,    4,  226,   22,
         21,  134,  476,   26,  480,    5,  144,   30, 5535,   18,   51,
         36,   28,  224,   92,   25,  104,    4,  226,   65,   16,   38,
       1334,   88,   12,   16,  283,    5,   16, 4472,  113,  103,   32,
         15,   16, 5345,   19,  178,   32], dtype=int32)

In [19]:
len(X_train[0])

50

In [20]:
len(X_train[9])

50

In [21]:
model = Sequential()

model.add(SimpleRNN(32,input_shape=(50,1),return_sequences=False))
model.add(Dense(1,activation='sigmoid'))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 32)                1088      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 1121 (4.38 KB)
Trainable params: 1121 (4.38 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

model.fit(X_train,y_train,epochs=10,validation_data=(X_test,y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b357ff0d1b0>