# **Sentiment Analysis with RNN**

Solve classification problem ( Sentiment Analysis in NLP) with RNN.(Deep Learning Language Model)

Sentiment Analysis:
- Sentiment Analysis is a task in NLP that involves determining the sentiment or emotion expressed in a given text.
- It is used to classify the overall sentiment of a piece of text as positive, negative, or neutral.

Restaurant Review Dataset


# 1. Importing libraries

In [1]:
import numpy as np
import pandas as pd

from gensim.models import Word2Vec

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# 2. Create Dataset

In [2]:
data = {
    "text": [
        "Yemekler çok lezzetliydi",
        "Servis çok yavaştı",
        "Mekan tertemiz ve ferahtı",
        "Garsonlar güler yüzlüydü",
        "Fiyatlar çok pahalıydı",
        "Yemekler taze ve sıcaktı",
        "Masalar kirliydi",
        "Atmosfer çok güzeldi",
        "Çok uzun süre bekledik",
        "Tatlılar muhteşemdi",
        "Yemekler soğuk geldi",
        "Personel çok ilgiliydi",
        "Porsiyonlar çok küçüktü",
        "Manzara harikaydı",
        "Tuvaletler temiz değildi",
        "Çorbalar enfesti",
        "Müzik çok gürültülüydü",
        "Kahve mükemmeldi",
        "Hesap yanlış geldi",
        "Dekorasyon çok şıktı",
        "Yemekte saç bulduk",
        "Servis hızlıydı",
        "İçecekler ılıktı",
        "Menü çeşitleri zengindi",
        "Masada karınca gördük",
        "Ekmekler tazeydi",
        "Klima çalışmıyordu",
        "Çalışanlar profesyoneldi",
        "Yemek yanıktı",
        "Otopark genişti",
        "Rezervasyon karışmıştı",
        "Meze çeşitleri boldu",
        "Tabaklar kirliydi",
        "Bahçesi çok güzeldi",
        "Böcek gördük",
        "Taze sıkılmış meyve suları harikaydı",
        "Yemek çiğdi",
        "Wifi hızlıydı",
        "Çatal bıçaklar kirliydi",
        "Çocuk menüsü çok iyiydi",
        "Yemekte plastik bulduk",
        "Kahvaltı zengin ve lezzetliydi",
        "Salata bayattı",
        "Personel çok nazikti",
        "Fiyat performans kötüydü",
        "Izgara ürünleri mükemmeldi",
        "Masa düzeni özensizdi",
        "Tatlılar tazeydi",
        "Sipariş unutuldu",
        "Manzaralı masa deneyimi harikaydı"
    ],
    "label": [
        "positive",
        "negative",
        "positive",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive",
        "negative",
        "positive"
    ]
}

In [3]:
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,text,label
0,Yemekler çok lezzetliydi,positive
1,Servis çok yavaştı,negative
2,Mekan tertemiz ve ferahtı,positive
3,Garsonlar güler yüzlüydü,positive
4,Fiyatlar çok pahalıydı,negative
5,Yemekler taze ve sıcaktı,positive
6,Masalar kirliydi,negative
7,Atmosfer çok güzeldi,positive
8,Çok uzun süre bekledik,negative
9,Tatlılar muhteşemdi,positive


# 3. Data Cleaning and Preprocessing: Tokenization, Padding, Encoding


In [5]:
#tokenization

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])
word_index = tokenizer.word_index

#padding process

max_length = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_length)
print(X.shape)

#label encoding

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['label'])

# train test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(50, 5)
(40, 5) (10, 5) (40,) (10,)


# 4. Word Embeddings:Word2Vec


In [6]:
sentences = [text.split() for text in df["text"]]
word2vec_model = Word2Vec(sentences, vector_size=50, window=5, min_count=1) # Warning

embedding_dim = 50
embedding_matrix = np.zeros((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
    if word in word2vec_model.wv:
        embedding_matrix[i] = word2vec_model.wv[word]


In [7]:
embedding_matrix


array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.00106203,  0.00050121,  0.01021199, ...,  0.01925021,
         0.00995342,  0.01849777],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-0.00343516,  0.01282185, -0.01855734, ..., -0.01115614,
        -0.00759128, -0.00118066],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.00400265, -0.0195285 ,  0.01042388, ...,  0.00888986,
        -0.01621606, -0.00811627]])

# 5. Model Creation: Build train and test RNN model


In [8]:
model = Sequential()

# embedding layer
model.add(Embedding(input_dim = len(word_index) + 1, output_dim = embedding_dim, weights=[embedding_matrix], input_length=max_length, trainable=False))


# RNN layer
model.add(SimpleRNN(units=50, return_sequences=False))


# Output layer
model.add(Dense(1, activation='sigmoid'))



In [9]:
# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 776ms/step - accuracy: 0.5979 - loss: 0.6936 - val_accuracy: 0.7000 - val_loss: 0.6903
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 170ms/step - accuracy: 0.5604 - loss: 0.6919 - val_accuracy: 0.7000 - val_loss: 0.6886
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.5771 - loss: 0.6911 - val_accuracy: 0.7000 - val_loss: 0.6872
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step - accuracy: 0.5646 - loss: 0.6897 - val_accuracy: 0.6000 - val_loss: 0.6861
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step - accuracy: 0.5437 - loss: 0.6889 - val_accuracy: 0.7000 - val_loss: 0.6859
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 166ms/step - accuracy: 0.5708 - loss: 0.6879 - val_accuracy: 0.7000 - val_loss: 0.6858
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x21785a12200>

In [13]:
#evaluate rnn model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss:{test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - accuracy: 0.7000 - loss: 0.6833
Test Loss:0.6832640171051025
Test Accuracy: 0.699999988079071


In [28]:
def classify_sentence(sentence):

    seq=tokenizer.texts_to_sequences([sentence])
    padded_seq = pad_sequences(seq,maxlen=max_length)

    prediction = model.predict(padded_seq)

    predicted_class = (prediction > 0.5).astype(int)
    label = "positive" if predicted_class[0][0] == 1 else "negative"

    return label

In [31]:
sentence = "Restorant temiz degildi."

In [32]:
result = classify_sentence(sentence)
print(f"Result: {result}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 532ms/step


Result: negative


In [34]:
# Save the model in native Keras format
model.save('sentiment_model.keras')  # Changed from .h5 to .keras

# Save the tokenizer
import pickle
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save max_length
with open('max_length.txt', 'w') as f:
    f.write(str(max_length))