DATASET USED: 

[Khan, Muhammad Yaseen, and Nizami, Muhammad Suffian. "Urdu Sentiment Corpus (v1.0): Linguistic Exploration and Visualization of Labeled Datasetfor Urdu Sentiment Analysis." In 2020 IEEE 2nd International Conference On Information Science & Communication Technology (ICISCT). IEEE, 2020.](https://ieeexplore.ieee.org/abstract/document/9080043/)

In [161]:
import pandas as pd
import matplotlib.pyplot as plt
import re
import string
import nltk
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from nltk.stem import PorterStemmer, WordNetLemmatizer
from collections import Counter
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense, LSTM, GRU
from sklearn.model_selection import train_test_split

# Preprocessing

In [None]:
df = pd.read_csv('C:/Users/Dell/Desktop/urdu-sentiment-corpus-v1.tsv', sep='\t')
df

Unnamed: 0,Tweet,Class
0,میں نے ایٹم بم بنایا ھے ۔۔۔۔او بھائی ایٹم بمب ...,P
1,چندے سے انقلاب اور عمران خان وزیر اعظم نہیں بن...,N
2,ٹویٹر کا خیال کیسے آیا ؟,O
3,"سرچ انجن گوگل کے نائب صدر نے فضا میں ، 130,000...",P
4,ابھی تک اسکی لہریں کبھی کبھی آ جاتی ہیں یار :أْ,P
...,...,...
995,اُس آدمی نے اِس سالار کو کافی معقول ٹپ دی ہے ۔,P
996,چچا غالب کی روح سے معذرت کے ساتھہم نے مانا کہ ...,P
997,واہ جناب واہ! اچھی رہی۔ جناب خود کو فرشتہ سمجو...,P
998,اسلام آباد :پی اے ٹی کا دھرنا ختم، صفائی کے کا...,P


In [None]:
df = df.dropna()
df['Class'].unique()

array(['P', 'N', 'O'], dtype=object)

In [None]:
print('O', (df['Class'] == 'O').sum())
print('N', (df['Class'] == 'N').sum())
print('P', (df['Class'] == 'P').sum())

O 20
N 499
P 480


In [None]:
df = df[df['Class'] != 'O']
df['Class'].unique()

array(['P', 'N'], dtype=object)

In [None]:
urdu_diacritics  = ['ِ', 'ٰ', 'ُ', 'ٍ', 'ً', 'َ']
def remove_diacritics(text):
    for letter in text:
    
        if letter in urdu_diacritics:
            text = text.replace(letter, '')
    return text

In [None]:
def remove_punctuations(raw_text):
    preprocessed_text=re.sub(r'[؛۔٫٪+=@#!؟،۔)(}{]', '',raw_text)
    return preprocessed_text

In [None]:
digits = ['۶', '۴', '۵', '۸', '۲', '۰', '۷', '۹', '۳', '۱','1','2','3','4','5','6','7','8','9','0']
def remove_digits(text):
    for letter in text:
        if letter in digits:
            text = text.replace(letter, '')
    return text

In [None]:
def tokenize(text):
    tokens = text.split()
    return tokens

In [None]:
stopwords=[]
with open('C:/Users/Dell/Downloads/stop_words.txt',encoding='utf-8') as f:
    lines = f.readlines()
    for line in lines:
      stopwords.append(line.strip('\n'))

stopwords

['آ',
 'آؤ',
 'آو',
 'آئی',
 'آئیں',
 'آئے',
 'آج',
 'آجاؤ',
 'آجائیں',
 'آجائیے',
 'آجاو',
 'آجکل',
 'آپ',
 'آپکا',
 'آپکو',
 'آپکی',
 'آیا',
 'اب',
 'ابھی',
 'اس',
 'اسطرح',
 'اسکا',
 'اسکی',
 'اسکے',
 'اسی',
 'اسے',
 'اطراف',
 'الگ',
 'ان',
 'انکا',
 'انکی',
 'انکے',
 'انھوں',
 'انھیں',
 'انہیں',
 'اں',
 'اور',
 'اونچا',
 'اونچی',
 'اونچے',
 'اوپر',
 'اپنا',
 'اپنی',
 'اپنے',
 'اکثر',
 'اگر',
 'اگرچہ',
 'اہم',
 'بائیں',
 'بار',
 'بارے',
 'باہر',
 'باہم',
 'بذریعہ',
 'بس',
 'بظاہر',
 'بعد',
 'بغیر',
 'بلاشبہ',
 'بند',
 'بہت',
 'بھی',
 'بیشتر',
 'بیشک',
 'بےشک',
 'تاہم',
 'تب',
 'تجھ',
 'تجھے',
 'تر',
 'ترین',
 'تعداد',
 'تلک',
 'تم',
 'تمام',
 'تمکو',
 'تمھارا',
 'تمھاری',
 'تمھارے',
 'تمھیں',
 'تمہارا',
 'تمہاری',
 'تمہارے',
 'تمہیں',
 'تو',
 'تک',
 'تھا',
 'تھی',
 'تھیں',
 'تھے',
 'تیسرا',
 'تیسری',
 'تیسرے',
 'جائیں',
 'جاتا',
 'جاتی',
 'جاتے',
 'جانا',
 'جانے',
 'جائے',
 'جب',
 'جبہی',
 'جبھی',
 'جبکہ',
 'جو',
 'جہاں',
 'جیسا',
 'جیساکہ',
 'جیسی',
 'جیسے',
 'جیسےکہ',
 'خود',
 'دا

In [None]:
def remove_stopwords(text):
    temp=[]
    words=text.split()
    for letter in words:
        if letter not in stopwords:
          if letter !='':
            temp.append(letter)
            
    return (' '.join(temp) )

In [None]:
df['Tweet'] = df['Tweet'].apply(lambda x: remove_diacritics(x))
df['Tweet'] = df['Tweet'].apply(lambda x: remove_digits(x))
df['Tweet'] = df['Tweet'].apply(lambda x: remove_punctuations(x))
df['Tweet'] = df['Tweet'].apply(lambda x: remove_stopwords(x))
df['Tweet']

0      [ایٹم, بم, بنایا, ھے, او, بھائی, ایٹم, بمب, کو...
1             [چندے, انقلاب, عمران, خان, وزیر, اعظم, بن]
3      [سرچ, انجن, گوگل, نائب, صدر, فضا, ,, فٹ, بلندی...
4                                      [لہریں, یار, :أْ]
5      [گندی, زبان, گٹر, دماغ, جاهل, جیالے, هو, جیالا...
                             ...                        
995                       [آدمی, سالار, کافی, معقول, ٹپ]
996    [چچا, غالب, روح, معذرت, ساتھہم, مانا, ڈیزلمفت,...
997    [واہ, جناب, واہ, اچھی, جناب, فرشتہ, سمجوں, انسان]
998    [اسلام, آباد, :پی, اے, ٹی, دھرنا, ختم, صفائی, ...
999         [دنیا, راہ, وفا, ساتھتم, چلے, چلو, چلی, چلے]
Name: Tweet, Length: 979, dtype: object

In [None]:
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

def stem_and_lemmatize(text):
    stemmed_words = [stemmer.stem(word) for word in text]
    lemmatized_words = [lemmatizer.lemmatize(word) for word in stemmed_words]
    return " ".join(lemmatized_words)

df['Tweet'] = df['Tweet'].apply(stem_and_lemmatize)

In [None]:
df['Tweet']

0      ایٹم بم بنایا ھے او بھائی ایٹم بمب کوٹ لکھپت ا...
1                     چندے انقلاب عمران خان وزیر اعظم بن
3      سرچ انجن گوگل نائب صدر فضا , فٹ بلندی چھلانگ ع...
4                                          لہریں یار :أْ
5      گندی زبان گٹر دماغ جاهل جیالے هو جیالا هو جاهل...
                             ...                        
995                             آدمی سالار کافی معقول ٹپ
996      چچا غالب روح معذرت ساتھہم مانا ڈیزلمفت ہاتھ برا
997             واہ جناب واہ اچھی جناب فرشتہ سمجوں انسان
998    اسلام آباد :پی اے ٹی دھرنا ختم صفائی کام آغاز ...
999                  دنیا راہ وفا ساتھتم چلے چلو چلی چلے
Name: Tweet, Length: 979, dtype: object

In [None]:
labelencoder = LabelEncoder()
df['Class'] = labelencoder.fit_transform(df['Class'])
print(df['Class'])

0      1
1      0
3      1
4      1
5      0
      ..
995    1
996    1
997    1
998    1
999    1
Name: Class, Length: 979, dtype: int32


In [None]:
print('N', (df['Class'] == 0).sum())
print('P', (df['Class'] == 1).sum())

N 499
P 480


In [None]:
df.dtypes

Tweet    object
Class     int32
dtype: object

In [134]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

X = df['Tweet']
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences
X_train_seq_padded = pad_sequences(X_train_seq, 50)
X_test_seq_padded = pad_sequences(X_test_seq, 50)

# SimpleRNN

layers 2, drop-out 0.3

In [135]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.5306122448979592
Precision: 0.5
F1-score: 0.39790575916230364
Recall: 0.33043478260869563


layers 2, drop-out 0.7

In [136]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.5102040816326531
Precision: 0.4725274725274725
F1-score: 0.4174757281553398
Recall: 0.3739130434782609


layers 3, drop-out 0.3

In [137]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.5061224489795918
Precision: 0.4727272727272727
F1-score: 0.4622222222222222
Recall: 0.45217391304347826


layers 3, drop-out 0.7

In [138]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(SimpleRNN(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.4857142857142857
Precision: 0.45864661654135336
F1-score: 0.49193548387096775
Recall: 0.5304347826086957


# LSTM

layers 2, drop-out 0.3

In [141]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(LSTM(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6571428571428571
Precision: 0.6371681415929203
F1-score: 0.631578947368421
Recall: 0.6260869565217392


layers 2, drop-out 0.7

In [142]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(LSTM(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6612244897959184
Precision: 0.6568627450980392
F1-score: 0.6175115207373272
Recall: 0.5826086956521739


layers 3, drop-out 0.3

In [143]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(LSTM(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.5795918367346938
Precision: 0.53
F1-score: 0.673015873015873
Recall: 0.9217391304347826


layers 3, drop-out 0.7

In [144]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(LSTM(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(LSTM(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6326530612244898
Precision: 0.6344086021505376
F1-score: 0.5673076923076923
Recall: 0.5130434782608696


# GRU

layers 2, drop-out 0.3

In [146]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(GRU(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6285714285714286
Precision: 0.6
F1-score: 0.6127659574468085
Recall: 0.6260869565217392


layers 2, drop-out 0.7

In [147]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(GRU(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6285714285714286
Precision: 0.625
F1-score: 0.5687203791469194
Recall: 0.5217391304347826


layers 3, drop-out 0.3

In [148]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(GRU(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(GRU(32, return_sequences=True))
model.add(Dropout(0.3))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6204081632653061
Precision: 0.5916666666666667
F1-score: 0.604255319148936
Recall: 0.6173913043478261


layers 3, drop-out 0.7

In [149]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(GRU(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(GRU(32, return_sequences=True))
model.add(Dropout(0.7))
model.add(GRU(32))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6122448979591837
Precision: 0.578125
F1-score: 0.6090534979423868
Recall: 0.6434782608695652


# Bidrectional LSTM

layers 2, drop-out 0.3

In [166]:
from tensorflow.keras.layers import Bidirectional

model = Sequential()
model.add(Embedding(10000, 32))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6408163265306123
Precision: 0.5828220858895705
F1-score: 0.683453237410072
Recall: 0.8260869565217391


layers 2, drop-out 0.7

In [165]:
from tensorflow.keras.layers import Bidirectional

model = Sequential()
model.add(Embedding(10000, 32))
model.add(Bidirectional(LSTM(32, return_sequences=True))) 
model.add(Dropout(0.7))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6571428571428571
Precision: 0.7183098591549296
F1-score: 0.5483870967741936
Recall: 0.4434782608695652


layers 3, drop-out 0.3

In [167]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(32, return_sequences=True))) 
model.add(Dropout(0.3))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.636734693877551
Precision: 0.6083333333333333
F1-score: 0.6212765957446809
Recall: 0.6347826086956522


layers 3, drop-out 0.7

In [168]:
model = Sequential()
model.add(Embedding(10000, 32))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Dropout(0.7))
model.add(Bidirectional(LSTM(32, return_sequences=True))) 
model.add(Dropout(0.7))
model.add(Bidirectional(LSTM(32)))
model.add(Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

model.fit(X_train_seq_padded, y_train, epochs=10, validation_data=(X_test_seq_padded, y_test))

y_pred = model.predict(X_test_seq_padded)

threshold = 0.5
y_pred = [1 if p > threshold else 0 for p in y_pred]

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("F1-score:", f1)
print("Recall:", recall)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.6285714285714286
Precision: 0.5909090909090909
F1-score: 0.6315789473684211
Recall: 0.6782608695652174
