### simple rnn for parameter demo

In [2]:
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Input

model = Sequential()
model.add(Input(shape = (4, 5)))
model.add(SimpleRNN(3))
model.add(Dense(1, activation='sigmoid'))

model.summary()

### sentiment analysis

In [3]:
import numpy as np
import pandas as pd

### Data prep example

In [4]:
docs = ['go india',
		'india india',
		'hip hip hurray',
		'jeetega bhai jeetega india jeetega',
		'bharat mata ki jai',
		'kohli kohli',
		'sachin sachin',
		'dhoni dhoni',
		'modi ji ki jai',
		'inquilab zindabad']

### tokenize

In [5]:
import tensorflow
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(oov_token='<MISSING>')
tokenizer.fit_on_texts(docs)

In [6]:
tokenizer.word_index

{'<MISSING>': 1,
 'india': 2,
 'jeetega': 3,
 'hip': 4,
 'ki': 5,
 'jai': 6,
 'kohli': 7,
 'sachin': 8,
 'dhoni': 9,
 'go': 10,
 'hurray': 11,
 'bhai': 12,
 'bharat': 13,
 'mata': 14,
 'modi': 15,
 'ji': 16,
 'inquilab': 17,
 'zindabad': 18}

In [7]:
tokenizer.word_counts

OrderedDict([('go', 1),
             ('india', 4),
             ('hip', 2),
             ('hurray', 1),
             ('jeetega', 3),
             ('bhai', 1),
             ('bharat', 1),
             ('mata', 1),
             ('ki', 2),
             ('jai', 2),
             ('kohli', 2),
             ('sachin', 2),
             ('dhoni', 2),
             ('modi', 1),
             ('ji', 1),
             ('inquilab', 1),
             ('zindabad', 1)])

In [8]:
tokenizer.document_count

10

### create sequences + pad sequences

In [9]:
sequences = tokenizer.texts_to_sequences(docs)
sequences

[[10, 2],
 [2, 2],
 [4, 4, 11],
 [3, 12, 3, 2, 3],
 [13, 14, 5, 6],
 [7, 7],
 [8, 8],
 [9, 9],
 [15, 16, 5, 6],
 [17, 18]]

In [10]:
from keras.preprocessing.sequence import pad_sequences
psequences = pad_sequences(sequences, padding='post')
psequences

array([[10,  2,  0,  0,  0],
       [ 2,  2,  0,  0,  0],
       [ 4,  4, 11,  0,  0],
       [ 3, 12,  3,  2,  3],
       [13, 14,  5,  6,  0],
       [ 7,  7,  0,  0,  0],
       [ 8,  8,  0,  0,  0],
       [ 9,  9,  0,  0,  0],
       [15, 16,  5,  6,  0],
       [17, 18,  0,  0,  0]], dtype=int32)

## RNN Sentiment Analysis on IMDB

In [11]:
from keras.datasets import imdb

(x_train, y_train), (x_test, y_test) = imdb.load_data()
x_train.shape

(25000,)

In [12]:
x_test.shape

(25000,)

In [13]:
from keras.preprocessing.sequence import pad_sequences
x_train_padded = pad_sequences(x_train, padding='post')

In [14]:
x_train_padded.shape[1]

2494

In [15]:
x_test_padded = pad_sequences(x_test, padding='post')
x_test_padded.shape

(25000, 2315)

In [16]:
from keras import Sequential
from keras.layers import Dense, SimpleRNN, Input

model = Sequential()

model.add(Input(shape=(x_train_padded.shape[1], 1)))
model.add(SimpleRNN(32, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

model.summary()

In [17]:
model.compile(optimizer= 'adam', loss='binary_crossentropy', metrics=['accuracy'])

In [18]:
model.fit(x_train_padded, y_train, epochs=5, validation_data=(x_test_padded, y_test))

Epoch 1/5


I0000 00:00:1740688743.703681 1868432 service.cc:148] XLA service 0x7702f40b8fe0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1740688743.703718 1868432 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-02-27 13:39:03.729422: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1740688743.822400 1868432 cuda_dnn.cc:529] Loaded cuDNN version 90701


[1m  1/782[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16:41[0m 1s/step - accuracy: 0.4688 - loss: 0.6940

I0000 00:00:1740688744.154662 1868432 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 95ms/step - accuracy: 0.5032 - loss: 0.6945 - val_accuracy: 0.5026 - val_loss: 0.6932
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 94ms/step - accuracy: 0.4951 - loss: 0.6943 - val_accuracy: 0.5027 - val_loss: 0.6933
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 95ms/step - accuracy: 0.4998 - loss: 0.6937 - val_accuracy: 0.4956 - val_loss: 0.6935
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 94ms/step - accuracy: 0.5004 - loss: 0.6936 - val_accuracy: 0.5015 - val_loss: 0.6935
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 94ms/step - accuracy: 0.4968 - loss: 0.6940 - val_accuracy: 0.4985 - val_loss: 0.6934


<keras.src.callbacks.history.History at 0x7704f0d53250>

## embedding

In [19]:
docs = ['go india',
		'india india',
		'hip hip hurray',
		'jeetega bhai jeetega india jeetega',
		'bharat mata ki jai',
		'kohli kohli',
		'sachin sachin',
		'dhoni dhoni',
		'modi ji ki jai',
		'inquilab zindabad']

In [20]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [21]:
tokenizer = Tokenizer(oov_token='<MISSING>')
tokenizer.fit_on_texts(docs)

In [30]:
tokenizer.word_index

{'<MISSING>': 1,
 'india': 2,
 'jeetega': 3,
 'hip': 4,
 'ki': 5,
 'jai': 6,
 'kohli': 7,
 'sachin': 8,
 'dhoni': 9,
 'go': 10,
 'hurray': 11,
 'bhai': 12,
 'bharat': 13,
 'mata': 14,
 'modi': 15,
 'ji': 16,
 'inquilab': 17,
 'zindabad': 18}

In [23]:
sequences = tokenizer.texts_to_sequences(docs)

In [32]:
from keras.preprocessing.sequence import pad_sequences
x_train = pad_sequences(sequences, padding='post')
x_train.shape

(10, 5)

In [35]:
from keras import Sequential
from keras.layers import Input, Dense, Embedding

model = Sequential()
model.add(Embedding(len(tokenizer.word_index) - 1, output_dim = 5))

model.summary()

In [None]:
model.compile(optimizer='adam', metrics = ['accuracy'])