In [50]:
!pip install tensorflow
!pip install tensorflow-text

In [51]:
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
nltk.download('punkt')

import tensorflow as tf

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from tabulate import tabulate
from nltk.tokenize import word_tokenize

import numpy as np
import tensorflow_hub as hub
import tensorflow_text as text
import sys
sys.path.append('models')

import warnings
warnings.filterwarnings("ignore")

In [52]:
dataset = pd.read_csv('../input/resturantreviewenglish6554/Rest_review.csv')
dataset.head(20)

# ***Pre-Processing***

In [53]:
### Dataset Preprocessing
def process_text(text):
    text = text.lower().replace('\n',' ').replace('\r','').strip()
    text = re.sub(' +', ' ', text)
    text = re.sub('[''````£|¢|/=।!“’<>‘॥”‰\'🤓⌚🤰🥺⚽️✌�￰🥀🤣🤡🤗🤐🤦🤔⏩⏹￰]', ' ', text)               
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           u"\u00C0-\u017F"          #latin
                           u"\u2000-\u206F"          #generalPunctuations 
                           u"\\U0001f90f" 
                           u"\\U0001f9cf"
                           u"\U0001fa78"  
                           "]+", flags=re.UNICODE)
    text = emoji_pattern.sub(r'', text)
    text = text.replace('_',' ')
    text = text.replace('ঃ',' ')
    text = re.sub(r'[^\w\s]','',text)
   
    
    stop_words = set(stopwords.words('english')) 
    word_tokens = word_tokenize(text) 
    filtered_sentence = [w for w in word_tokens if not w in stop_words] 
    filtered_sentence = [] 
    for w in word_tokens: 
        if w not in stop_words: 
            filtered_sentence.append(w) 
    
    text = " ".join(filtered_sentence) 
    return text

In [54]:
X = dataset['Text'].apply(process_text)
y = dataset['Sentiment']

### Vocabulary size
voc_size=5000


# ***Feature Extraction***

In [55]:
onehot_repr=[one_hot(words,voc_size)for words in X] 

In [56]:
sent_length=50
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

In [57]:
embedded_docs[0]

# ***Table***

In [58]:
evalution_table = []
evalution_table.append(['Classifier Name','Precision','Recall','Accuracy','F1-Score'])

# ***LSTM***

In [59]:
## Creating model LSTM
embedding_vector_features=40
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(100))
model.add(Dropout(0.3))
model.add(Dense(1,activation='relu'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

In [60]:
len(embedded_docs),y.shape

In [61]:
import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

In [62]:
X_final.shape,y_final.shape

In [63]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state = 42)

In [64]:
### Finally Training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=5,batch_size=32)

In [65]:
y_pred = model.predict(X_test)

y_pred_lstm= np.where(y_pred > 0.5, 1, 0)
#print(y_pred_lstm)

cm_lstm = confusion_matrix(y_test,y_pred_lstm)

pr_lstm= precision_score(y_test, y_pred_lstm, average=None)
re_lstm= recall_score(y_test, y_pred_lstm, average=None)
f1_lstm= f1_score(y_test, y_pred_lstm, average=None)
acc_lstm = accuracy_score(y_test,y_pred_lstm)

from mlxtend.plotting import plot_confusion_matrix
print("Confusion Matrix for LSTM")
plot_confusion_matrix(conf_mat=cm_lstm,show_absolute=True,
                                show_normed=True,
                                colorbar=True)

evalution_table.append(['LSTM',pr_lstm[0],re_lstm[0],acc_lstm,f1_lstm[0]])

# ***Bi-LSTM***

In [66]:
## Creating model Bi-LSTM
embedding_vector_features=40
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(Bidirectional(LSTM(100)))
model.add(Dropout(0.3))
model.add(Dense(1,activation='relu'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

In [67]:
len(embedded_docs),y.shape

In [68]:
import numpy as np
X_final=np.array(embedded_docs)
y_final=np.array(y)

In [69]:
X_final.shape,y_final.shape

In [70]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state = 42)

In [71]:
### Finally Training
model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=5,batch_size=32)

In [72]:
y_pred = model.predict(X_test)

y_pred_lstm= np.where(y_pred > 0.5, 1, 0)
#print(y_pred_lstm)

cm_lstm = confusion_matrix(y_test,y_pred_lstm)

pr_lstm= precision_score(y_test, y_pred_lstm, average=None)
re_lstm= recall_score(y_test, y_pred_lstm, average=None)
f1_lstm= f1_score(y_test, y_pred_lstm, average=None)
acc_lstm = accuracy_score(y_test,y_pred_lstm)

from mlxtend.plotting import plot_confusion_matrix
print("Confusion Matrix for Bi-LSTM")
plot_confusion_matrix(conf_mat=cm_lstm,show_absolute=True,
                                show_normed=True,
                                colorbar=True)

evalution_table.append(['Bi-LSTM',pr_lstm[0],re_lstm[0],acc_lstm,f1_lstm[0]])

# ***Result***

In [73]:
print(tabulate(evalution_table))