In [31]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from backend.preprocessing import preprocess_text
import pandas as pd

# Veri setini yükle
dataset = pd.read_csv('data.csv', delimiter=',', header=0, names=['review','sentiment'])

# Metin ön işleme
dataset['cleaned_review'] = dataset['review'].apply(preprocess_text)

# Etiketleri sayısal forma dönüştürme
dataset['label'] = dataset['sentiment'].map({'positive':1, 'negative':0})

# Eğitim/Test ayrımı
X = dataset['cleaned_review'].values
y = dataset['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Bag-of-Words (BoW) ile temsil
bow_vectorizer = CountVectorizer(ngram_range=(1, 2))  # Unigram ve Bigram
X_train_bow = bow_vectorizer.fit_transform(X_train)
X_test_bow = bow_vectorizer.transform(X_test)

# TF-IDF ile temsil
tfidf_vectorizer = TfidfVectorizer(ngram_range=(1, 2))  # Unigram ve Bigram
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

# Naive Bayes Modeli (Bag-of-Words ile)
nb_model_bow = MultinomialNB()
nb_model_bow.fit(X_train_bow, y_train)
y_pred_bow = nb_model_bow.predict(X_test_bow)
accuracy_bow = accuracy_score(y_test, y_pred_bow)

# Naive Bayes Modeli (TF-IDF ile)
nb_model_tfidf = MultinomialNB()
nb_model_tfidf.fit(X_train_tfidf, y_train)
y_pred_tfidf = nb_model_tfidf.predict(X_test_tfidf)
accuracy_tfidf = accuracy_score(y_test, y_pred_tfidf)

# Sonuçları görüntüleme
bow_report = classification_report(y_test, y_pred_bow)
tfidf_report = classification_report(y_test, y_pred_tfidf)

# Raporları ve doğrulukları yazdır
print(f"Bag-of-Words Naive Bayes Accuracy: {accuracy_bow:.2f}")
print("Bag-of-Words Classification Report:")
print(bow_report)

print(f"TF-IDF Naive Bayes Accuracy: {accuracy_tfidf:.2f}")
print("TF-IDF Classification Report:")
print(tfidf_report)


Bag-of-Words Naive Bayes Accuracy: 0.88
Bag-of-Words Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.90      0.88      4961
           1       0.90      0.87      0.88      5039

    accuracy                           0.88     10000
   macro avg       0.88      0.88      0.88     10000
weighted avg       0.88      0.88      0.88     10000

TF-IDF Naive Bayes Accuracy: 0.89
TF-IDF Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.91      0.89      4961
           1       0.90      0.87      0.89      5039

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



In [37]:
import joblib

# Model ve vektörleştiricileri kaydet
joblib.dump(nb_model_tfidf, 'models/nb/nb_model_tfidf.joblib')
joblib.dump(tfidf_vectorizer, 'models/nb/nb_tfidf_vectorizer.joblib')

# Eğer Bag-of-Words modeli de kayıt edilecekse
joblib.dump(nb_model_bow, 'models/nb/nb_model_bow.joblib')
joblib.dump(bow_vectorizer, 'models/nb/nb_bow_vectorizer.joblib')


['models/nb/nb_bow_vectorizer.joblib']

In [43]:
import joblib

# Kayıtlı model ve vektörleştiricileri yükle
loaded_nb_model = joblib.load('models/nb/nb_model_tfidf.joblib')
loaded_tfidf_vectorizer = joblib.load('models/nb/nb_tfidf_vectorizer.joblib')

# Yeni gelen bir yorum örneği
new_review = "this movie is suck"

# Ön işleme adımları
processed_new_review = preprocess_text(new_review)  # preprocess_text fonksiyonunu tekrar kullanıyoruz

# Veriyi vektörleştirme
X_new = loaded_tfidf_vectorizer.transform([processed_new_review])

# Tahmin
prediction = loaded_nb_model.predict(X_new)
print("Tahmin Sonucu:", prediction)


Tahmin Sonucu: [0]
