In [1]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
import numpy as np

In [2]:
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
csv_path = os.path.join(desktop_path, "IMDB Dataset.csv")

df = pd.read_csv(csv_path)

In [3]:
df['label'] = (df['sentiment'] == 'positive').astype(int)

from sklearn.model_selection import train_test_split
X_train_texts, X_test_texts, y_train, y_test = train_test_split(
    df['review'].values,
    df['label'].values,
    test_size=0.2,
    random_state=42
)


In [4]:
vocab_size = 10000
maxlen = 200
embedding_dim = 64

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(X_train_texts)

X_train = tokenizer.texts_to_sequences(X_train_texts)
X_test  = tokenizer.texts_to_sequences(X_test_texts)

X_train = pad_sequences(X_train, maxlen=maxlen, padding='post', truncating='post')
X_test  = pad_sequences(X_test, maxlen=maxlen, padding='post', truncating='post')


In [5]:
model = Sequential([
    Embedding(vocab_size, 64, input_length=maxlen),
    LSTM(64),  # یا GRU(64)
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [6]:
history = model.fit(X_train, y_train, epochs=20, batch_size=128, validation_split=0.2, verbose=1)


Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 48ms/step - accuracy: 0.5389 - loss: 0.6828 - val_accuracy: 0.5817 - val_loss: 0.6707
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 45ms/step - accuracy: 0.5821 - loss: 0.6702 - val_accuracy: 0.5816 - val_loss: 0.6713
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 46ms/step - accuracy: 0.6157 - loss: 0.6547 - val_accuracy: 0.6951 - val_loss: 0.5936
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 47ms/step - accuracy: 0.6800 - loss: 0.6130 - val_accuracy: 0.7228 - val_loss: 0.5823
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 47ms/step - accuracy: 0.6783 - loss: 0.6065 - val_accuracy: 0.5282 - val_loss: 0.6852
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 47ms/step - accuracy: 0.5972 - loss: 0.6393 - val_accuracy: 0.7616 - val_loss: 0.5405
Epoch 7/20
[1m2

In [7]:
def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=maxlen, padding='post')
    pred = model.predict(padded)[0][0]
    return "مثبت" if pred > 0.5 else "منفی"

In [9]:
print(predict_sentiment("this movie was shit bruh"))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
منفی
