<a href="https://colab.research.google.com/github/daksha200/Projects/blob/testrepo/Many_to_One_Sentiment_Analysis_using_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
#Importing datasets
from keras.datasets import imdb

In [10]:
## Loading data from IMDB Dataset
vocabulary_size = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = vocabulary_size)
print('Loaded dataset with {} training samples, {} test samples'.format(len(X_train), len(X_test)))

Loaded dataset with 25000 training samples, 25000 test samples


In [11]:
## Two dictionaries one from word to id and the other vice versa
word2id = imdb.get_word_index()
id2word = {i: word for word, i in word2id.items()}

print([id2word.get(i, ' ') for i in X_train[6]])

print(y_train[6])

['the', 'and', 'full', 'involving', 'to', 'impressive', 'boring', 'this', 'as', 'and', 'and', 'br', 'villain', 'and', 'and', 'need', 'has', 'of', 'costumes', 'b', 'message', 'to', 'may', 'of', 'props', 'this', 'and', 'and', 'concept', 'issue', 'and', 'to', "god's", 'he', 'is', 'and', 'unfolds', 'movie', 'women', 'like', "isn't", 'surely', "i'm", 'and', 'to', 'toward', 'in', "here's", 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'carl', 'of', 'and', 'br', 'of', 'reviewer', 'closer', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'mistakes', 'and', 'was', "couldn't", 'of', 'few', 'br', 'of', 'you', 'to', "don't", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'many', 'br', 'of', 'overly', 'to', 'descent', 'people', 'time', 'very', 'bland']
1


In [12]:
#Pad sequences. To make all sequences length 500
from keras.preprocessing import sequence
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

In [13]:
#Using LSTM RNN for classification task
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

## Embedding size is 32
embedding_size=32

## Model is sequential
model=Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
#LSTM with 100 cells
model.add(LSTM(100))
## A dense layer to give output
model.add(Dense(1, activation='sigmoid'))
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 32)           160000    
                                                                 
 lstm_1 (LSTM)               (None, 100)               53200     
                                                                 
 dense_1 (Dense)             (None, 1)                 101       
                                                                 
Total params: 213301 (833.21 KB)
Trainable params: 213301 (833.21 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [14]:
## Binary_Cross Entropy loss for binary classification task
## Adam optimzizer
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [15]:
## Training the model in batches of 64 for 3 epochs
batch_size = 64
num_epochs = 3
X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:]
model.fit(X_train2, y_train2, validation_data=(X_valid, y_valid), batch_size=batch_size, epochs=num_epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7f2c74c4a080>

In [16]:
## Evaluating the task
scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])

Test accuracy: 0.6926800012588501


In [17]:
from keras import Sequential
from keras.layers import Embedding, Dense, GRU, Dropout

## Embedding size is 32
embedding_size=32

## Model is sequential
model=Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
# GRU with 100 cells
model.add(GRU(100))
## A dense layer to give output
model.add(Dense(1, activation='sigmoid'))
print(model.summary())


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 500, 32)           160000    
                                                                 
 gru (GRU)                   (None, 100)               40200     
                                                                 
 dense_2 (Dense)             (None, 1)                 101       
                                                                 
Total params: 200301 (782.43 KB)
Trainable params: 200301 (782.43 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [18]:
## Binary_Cross Entropy loss for binary classification task
## Adam optimzizer
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [19]:
## Training the model in batches of 64 for 3 epochs
batch_size = 64
num_epochs = 3
X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:]
model.fit(X_train2, y_train2, validation_data=(X_valid, y_valid), batch_size=batch_size, epochs=num_epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7f2c6e5ce890>

In [20]:
## Evaluating the task
scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])

Test accuracy: 0.8523200154304504
