## For testing this model:
Run the library and preprocessing cells and go to the last part for prediction new data

### Import libraries 

In [1]:
import pickle
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report
from hazm import Normalizer, word_tokenize, stopwords_list, Stemmer

### Read data files

In [2]:
# Train Data
data = pd.read_csv('dataset/train.csv')
comments = data['comment'].tolist()
sentiments = data['sentiment'].tolist()
# Test Data
data_test = pd.read_csv('dataset/test.csv')
comments_test = data_test['comment'].tolist()

### Function for preprocessing persian comments

In [3]:
normalizer = Normalizer()
stemmer = Stemmer()
vectorizer = TfidfVectorizer()
def preprocess(comment):
    comment = normalizer.normalize(comment)
    tokens = word_tokenize(comment)
    tokens = [stemmer.stem(token) for token in tokens if token not in stopwords_list()]
    return ' '.join(tokens)

preprocessed_comments = [preprocess(comment) for comment in comments]
X = vectorizer.fit_transform(preprocessed_comments)
preprocessed_comments_test = [preprocess(commen) for commen in comments_test]
X_test = vectorizer.transform(preprocessed_comments_test)

### Test model 

Based on results on testing different algorithms like SVM , Naivebayes , RandomForests , 
SVM algorithms has the best performance for this project

In [4]:
X_train, x_test, y_train, y_test = train_test_split(X, sentiments, test_size=0.2, random_state=42)

In [5]:
model_test = LinearSVC()
model_test.fit(X_train, y_train)



In [6]:
y_pred = model_test.predict(x_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    Negative       0.83      0.83      0.83       248
     Neutral       0.57      0.51      0.54        72
    Positive       0.82      0.85      0.83       189

    accuracy                           0.79       509
   macro avg       0.74      0.73      0.74       509
weighted avg       0.79      0.79      0.79       509



### Main model on all training data

In [7]:
model = LinearSVC()
model.fit(X, sentiments)



### Prediction test comments

In [8]:
predicted_sentiments = model.predict(X_test)
data_test['sentiment'] = predicted_sentiments

### Saving new test.csv file

In [9]:
data_test.to_csv('dataset/test.csv', index=False)

### Saving the trained model

In [10]:
with open('model/trained_model.pkl', 'wb') as file:
    pickle.dump(model, file)

### Function for predict a comment label

For running this cell first you should run library and preprocessing cells 
Note: for predicting a dataset , you should provide a list of comments , not an excel file!

In [11]:
# Load saved model 
with open('model/trained_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
# Predictig Function
def predict(comment: str):
  x_new =vectorizer.transform([preprocess(comment)])
  category = loaded_model.predict(x_new)
  return category 

# Prediction function for a list of comments
def predict_comments(comments: list):
  preprocessed_comments = [preprocess(comment) for comment in comments]
  x_new =vectorizer.transform(preprocessed_comments)
  categories = loaded_model.predict(x_new)
  return categories

In [12]:
sentence ='فروختن ماشین سرقتی به مردم ، ۶ ماه پیش ماشین خریدم  از این نمایشگاه ایست بازرسی جلومو گرفتن ماشینو بردن پارکینگ خودمم بازداشت شدم تا بفهمونم از این نمایشگاه خریدم دوروز نگه داشتن بعد معلوم شد ماشین سرقتی بوده خودشون شماره شاسی کوبیدن پلاک درست کردن فروختن'
predict(sentence)

array(['Negative'], dtype='<U8')

In [13]:
predict_comments(comments_test)

array(['Positive', 'Negative', 'Negative', 'Positive', 'Negative',
       'Negative', 'Negative', 'Positive', 'Negative', 'Neutral',
       'Positive', 'Neutral', 'Neutral', 'Positive', 'Positive',
       'Positive', 'Negative', 'Positive', 'Positive', 'Positive',
       'Negative', 'Neutral', 'Negative', 'Positive', 'Negative',
       'Neutral', 'Negative', 'Positive', 'Positive', 'Negative',
       'Positive', 'Negative', 'Positive', 'Positive', 'Negative',
       'Positive', 'Negative', 'Positive', 'Neutral', 'Neutral',
       'Positive', 'Positive', 'Negative', 'Positive', 'Positive',
       'Negative', 'Negative', 'Positive', 'Negative', 'Positive',
       'Negative', 'Positive', 'Negative', 'Negative', 'Negative',
       'Positive', 'Negative', 'Positive', 'Positive', 'Positive',
       'Positive', 'Positive', 'Negative', 'Negative', 'Positive',
       'Negative', 'Positive', 'Negative', 'Positive', 'Positive',
       'Neutral', 'Positive', 'Positive', 'Negative', 'Negative',
   

test