In [None]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models



import numpy as np

# مسیر فایل CSV روی دسکتاپ
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
csv_path = os.path.join(desktop_path, "IMDB Dataset.csv")

# خواندن داده‌ها از CSV
df = pd.read_csv(csv_path)

# تبدیل برچسب‌ها به 0 و 1
df['label'] = (df['sentiment'] == 'positive').astype(int)

# جدا کردن داده‌های آموزش و تست (80/20)
from sklearn.model_selection import train_test_split
X_train_texts, X_test_texts, y_train, y_test = train_test_split(
    df['review'].values,
    df['label'].values,
    test_size=0.2,
    random_state=42
)

# تبدیل متن‌ها به دنباله عددی
vocab_size = 10000
maxlen = 200
embedding_dim = 64

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(X_train_texts)

X_train = tokenizer.texts_to_sequences(X_train_texts)
X_test  = tokenizer.texts_to_sequences(X_test_texts)

X_train = pad_sequences(X_train, maxlen=maxlen, padding='post', truncating='post')
X_test  = pad_sequences(X_test, maxlen=maxlen, padding='post', truncating='post')

# ساخت مدل RNN ساده
model = models.Sequential([
    layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen, mask_zero=True),
    layers.SimpleRNN(64),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# آموزش مدل
history = model.fit(X_train, y_train, epochs=5, batch_size=128, validation_split=0.2, verbose=1)

# ارزیابی روی داده تست
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.3f}")


Epoch 1/10




[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - accuracy: 0.5560 - loss: 0.6832 - val_accuracy: 0.6459 - val_loss: 0.6254
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.7502 - loss: 0.5184 - val_accuracy: 0.7656 - val_loss: 0.4922
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.8496 - loss: 0.3543 - val_accuracy: 0.7716 - val_loss: 0.5058
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 24ms/step - accuracy: 0.9630 - loss: 0.1189 - val_accuracy: 0.7596 - val_loss: 0.6218
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 25ms/step - accuracy: 0.9907 - loss: 0.0388 - val_accuracy: 0.8134 - val_loss: 0.6421
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 26ms/step - accuracy: 0.9966 - loss: 0.0179 - val_accuracy: 0.7915 - val_loss: 0.7630
Epoch 7/10
[1m250/250[0m [32m━

In [7]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
import numpy as np

# مسیر فایل CSV روی دسکتاپ
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
csv_path = os.path.join(desktop_path, "IMDB Dataset.csv")

# خواندن داده‌ها از CSV
df = pd.read_csv(csv_path)

# تبدیل برچسب‌ها به 0 و 1
df['label'] = (df['sentiment'] == 'positive').astype(int)

# جدا کردن داده‌های آموزش و تست (80/20)
from sklearn.model_selection import train_test_split
X_train_texts, X_test_texts, y_train, y_test = train_test_split(
    df['review'].values,
    df['label'].values,
    test_size=0.2,
    random_state=42
)

# تبدیل متن‌ها به دنباله عددی
vocab_size = 10000
maxlen = 200
embedding_dim = 64

tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(X_train_texts)

X_train = tokenizer.texts_to_sequences(X_train_texts)
X_test  = tokenizer.texts_to_sequences(X_test_texts)

X_train = pad_sequences(X_train, maxlen=maxlen, padding='post', truncating='post')
X_test  = pad_sequences(X_test, maxlen=maxlen, padding='post', truncating='post')

# ساخت مدل RNN ساده
model = Sequential([
    Embedding(vocab_size, 64, input_length=maxlen),
    LSTM(64),  # یا GRU(64)
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# آموزش مدل
history = model.fit(X_train, y_train, epochs=20, batch_size=128, validation_split=0.2, verbose=1)

# ارزیابی روی داده تست
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_acc:.3f}")

Epoch 1/20




[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 48ms/step - accuracy: 0.5158 - loss: 0.6928 - val_accuracy: 0.5115 - val_loss: 0.7048
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 47ms/step - accuracy: 0.5661 - loss: 0.6698 - val_accuracy: 0.5385 - val_loss: 0.6870
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 46ms/step - accuracy: 0.5802 - loss: 0.6614 - val_accuracy: 0.6737 - val_loss: 0.6645
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.5666 - loss: 0.6764 - val_accuracy: 0.5953 - val_loss: 0.6369
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 47ms/step - accuracy: 0.6707 - loss: 0.5663 - val_accuracy: 0.8098 - val_loss: 0.4865
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 48ms/step - accuracy: 0.6927 - loss: 0.5956 - val_accuracy: 0.6870 - val_loss: 0.6018
Epoch 7/20
[1m250/250[0m 

In [8]:
def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=maxlen, padding='post')
    pred = model.predict(padded)[0][0]
    return "مثبت" if pred > 0.5 else "منفی"

In [10]:
print(predict_sentiment("Basically there's a family where a little boy (Jake) thinks there's a zombie in his closet & his parents are fighting all the time.<br /><br />This movie is slower than a soap opera... and suddenly, Jake decides to become Rambo and kill the zombie.<br /><br />OK, first of all when you're going to make a film you must Decide if its a thriller or a drama! As a drama the movie is watchable. Parents are divorcing & arguing like in real life. And then we have Jake with his closet which totally ruins all the film! I expected to see a BOOGEYMAN similar movie, and instead i watched a drama with some meaningless thriller spots.<br /><br />3 out of 10 just for the well playing parents & descent dialogs. As for the shots with Jake: just ignore them."))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
منفی
