In [91]:
# Importing the required libraries
import re
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from keras.models import Sequential,Model
from keras.layers import Dense,Bidirectional
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.layers import *

# Load the dataset

In [92]:
df=pd.read_csv('eng_dataset.csv')
df.head()
anger_count = len(df[df['sentiment'] == 'anger'])
fear_count = len(df[df['sentiment'] == 'fear'])
joy_count = len(df[df['sentiment'] == 'joy'])
sadness_count = len(df[df['sentiment'] == 'sadness'])

print("Number of rows with sentiment 'anger':", anger_count)

print("Number of rows with sentiment 'fear':", fear_count)

print("Number of rows with sentiment 'joy':", joy_count)

print("Number of rows with sentiment 'sadness':", sadness_count)

Number of rows with sentiment 'anger': 1701
Number of rows with sentiment 'fear': 2252
Number of rows with sentiment 'joy': 1616
Number of rows with sentiment 'sadness': 1533


In [93]:
Sentences = df['content']
Sentiments = df['sentiment']

In [94]:
len(Sentences), set(Sentiments)

(7102, {'anger', 'fear', 'joy', 'sadness'})

In [95]:
print(Sentiments)

0         anger
1         anger
2         anger
3         anger
4         anger
         ...   
7097    sadness
7098    sadness
7099    sadness
7100    sadness
7101    sadness
Name: sentiment, Length: 7102, dtype: object


# Load the Glove Embedding

In [96]:
glove ='glove.6B.50d.txt'

def load_glove_embeddings(path):
    embeddings_index = {}
    with open(path, 'r', encoding='utf8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

Glove = load_glove_embeddings(glove)

In [97]:
def cosine_similarity(a, b):
    """
    Computes the cosine similarity between two vectors a and b.
    """
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

# Preprocessing

In [98]:
# it is removing special characters and also creating tokens
def preprocess(Sentences):
    sentences = tf.strings.substr(Sentences, 0, 300)
    sentences = tf.strings.regex_replace(sentences, b"<br\\s*/?>", b" ")
    sentences = tf.strings.regex_replace(sentences, b"[^a-zA-Z']", b" ")
    sentences = tf.strings.split(sentences)
    sentences = tf.strings.lower(sentences)
    sentences = sentences.to_tensor(default_value=b"<pad>")
    return sentences

In [99]:
sentences = preprocess(Sentences)
sentences.shape

TensorShape([7102, 34])

# Encoding

In [100]:
def encoding(sentences, Glove):
    Encoded_vec = []
    for sentence in sentences:
        sent_vec = []
        for token in sentence:
            token = token.numpy().decode('utf-8')
            if token in Glove:
                sent_vec.append(Glove[token])
            else:
                sent_vec.append(np.zeros(50))
        Encoded_vec.append(sent_vec)
    return Encoded_vec

In [101]:
Encoded_vec = encoding(sentences, Glove)
X = np.array(Encoded_vec)
print(X.shape)

(7102, 34, 50)


# One-hot Encoding of target sentiments

In [102]:
# Perform one-hot encoding on df[0] i.e emotion
enc = OneHotEncoder(handle_unknown='ignore')
Y = enc.fit_transform(np.array(Sentiments).reshape(-1,1)).toarray()
print(Y.shape)

(7102, 4)


# Split the train & test dataset

In [None]:
# Split into train and test
from keras.layers import Embedding
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=23)

In [103]:
#Defining the BiLSTM Model
class BiLSTMModel:
    def __init__(self):
        self.model = Sequential()
        self.model.add(Bidirectional(LSTM(100, input_shape=(100, 50))))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(4, activation='softmax'))
        self.model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

    def fit(self, X, Y, epochs, batch_size):
        self.model.fit(X, Y, epochs=epochs, batch_size=batch_size)

    def evaluate(self, X, Y, batch_size):
        return self.model.evaluate(X, Y, batch_size=batch_size)

    def predict(self, X):
        return self.model.predict(X)

In [105]:
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.lancaster import LancasterStemmer
import nltk
import re
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, GRU, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
from livelossplot.tf_keras import PlotLossesCallback
from livelossplot import PlotLossesKeras

# Train the model

In [106]:
# create an instance of the BiLSTMModel class
model = BiLSTMModel()
hist = model.fit(X_train, Y_train,
                 epochs = 50,
                 batch_size = 64)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [107]:
model.model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_2 (Bidirectio  (None, 200)              120800    
 nal)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 200)               0         
                                                                 
 dense_2 (Dense)             (None, 4)                 804       
                                                                 
Total params: 121,604
Trainable params: 121,604
Non-trainable params: 0
_________________________________________________________________


# Evaluations

In [109]:
Loss, acc = model.evaluate(X_test, Y_test, batch_size=64)
print("Loss: %.2f" % (Loss))
print("acc: %.2f" % (acc))

Loss: 1.07
acc: 0.77


# Prediction

In [110]:
#First, initialize it.
while True:

  i=input("Enter : ")
  twt = [i]
  #Next, tokenize it.
  Twt = preprocess(twt)

  # Encoding
  Twt = encoding(Twt, Glove)
  Twt = np.array(Twt)
  print(Twt.shape)
  #Predict the sentiment by passing the sentence to the model we built.
  sentiment = model.predict(Twt)[0]
  label = np.argmax(sentiment)
  print(enc.categories_[0][label])

Enter : hey i am feeling good
(1, 5, 50)
fear
Enter : it feels great
(1, 3, 50)
sadness
Enter : i am happy for you
(1, 5, 50)
joy
Enter : See its a sunny day
(1, 5, 50)
joy
Enter : This is the most useless product
(1, 6, 50)
fear
Enter : 
(1, 0)


ValueError: in user code:

    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 2169, in predict_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 2155, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 2143, in run_step  **
        outputs = model.predict_step(data)
    File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 2111, in predict_step
        return self(x, training=False)
    File "/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None

    ValueError: Exception encountered when calling layer 'sequential_2' (type Sequential).
    
    Cannot iterate over a shape with unknown rank.
    
    Call arguments received by layer 'sequential_2' (type Sequential):
      • inputs=tf.Tensor(shape=<unknown>, dtype=float32)
      • training=False
      • mask=None


In [111]:
model.model.save('trained_model_3.h5')