In [199]:
from datasets import load_dataset
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import string
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [201]:
dataset = load_dataset("PNLPhub/snappfood-sentiment-analysis")
dataset['train'].to_csv("snappfood_sentiment_train.csv", index=False  , encoding='utf-8')
dataset['test'].to_csv("snappfood_sentiment_test.csv", index=False  , encoding='utf-8')
df_train = pd.read_csv("snappfood_sentiment_train.csv")
df_test = pd.read_csv("snappfood_sentiment_test.csv")

In [203]:
file_path = 'stopwords.txt'
with open(file_path, 'r', encoding='utf-8') as f:
    stop_words = f.read().splitlines()
persian_punctuation = ',؟،؛٪٫×»،«'
all_punctuation = string.punctuation + persian_punctuation

In [205]:
def cleaning_Sentence(sentence) :
    sentence = sentence.lower()
    sentence = sentence.translate(str.maketrans('', '', all_punctuation))
    tokens = [word for word in sentence.split() if word not in stop_words]
    return ' '.join(tokens) 

In [207]:
for x in range(len(df_train.axes[0])-1) : 
    df_train.loc[x,"comment"] = cleaning_Sentence(df_train["comment"][x]) 
for x in range(len(df_test.axes[0])-1) : 
    df_test.loc[x,"comment"] = cleaning_Sentence(df_test["comment"][x]) 


In [61]:
df_train.to_csv("cleared_data_train.csv", encoding="utf-8" , index = False)
df_test.to_csv("cleared_data_test.csv", encoding="utf-8" , index = False)

In [208]:
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(df_train['comment'])
y_train = df_train['label']

In [211]:
model = MultinomialNB()
model.fit(X_train, y_train)

In [213]:
X_test = vectorizer.transform(df_test['comment'])  
predictions = model.predict(X_test)

In [215]:
y_test = df_test['label'] 
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions, average='macro', labels=['HAPPY', 'SAD'])
recall = recall_score(y_test, predictions, average='macro', labels=['HAPPY', 'SAD'])
f1 = f1_score(y_test, predictions, average='macro', labels=['HAPPY', 'SAD'])

In [217]:
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("f1:", f1)

Accuracy: 0.8361563157312077
Precision: 0.8406657505849695
Recall: 0.8357660579977039
f1: 0.8354943644855535


In [219]:
cleared_data = [] 
array = ["اصلا دوستش نداشتم ","فوق العاده بود","بی نهایت خوشمزه بود","هر چی از خوب بودنش بگم کم گفتم"]
for a in array :
    cleared_data.append(cleaning_Sentence(a))
print(cleared_data)
X_test = vectorizer.transform(cleared_data)  
predictions = model.predict(X_test)
print(predictions)


['اصلا دوستش نداشتم', 'فوق العاده بود', 'نهایت خوشمزه بود', 'چی خوب بودنش بگم کم گفتم']
['SAD' 'HAPPY' 'HAPPY' 'HAPPY']
