In [None]:
from utils import process_comment, lookup
import json
from nltk.corpus import stopwords, twitter_samples
import numpy as np


In [None]:
all_positive_comments = json.loads(open('all_positive_comments.json').read())
all_negative_comments = json.loads(open('all_negative_comments.json').read())

In [None]:
test_pos = all_positive_comments[4000:8000]
train_pos = all_positive_comments[:4000]
test_neg = all_negative_comments[4000:8000]
train_neg = all_negative_comments[:4000]

train_x = train_pos + train_neg
test_x = test_pos + test_neg

train_y = np.append(np.ones(len(train_pos)), np.zeros(len(train_neg)))
test_y = np.append(np.ones(len(test_pos)), np.zeros(len(test_neg)))

In [None]:
custom_comment = "Bu ürünü gerçekten çok beğendim. Çok kullanışlı. Herkese öneriyorum."
custom_comment = process_comment(custom_comment)

print(custom_comment)

In [None]:
def count_comments(result, comments, ys):
    for y, comment in zip(ys, comments):
        for word in process_comment(comment):
            pair = (word, y)
            
            if pair in result:
                result[pair] += 1

            else:
                result[pair] = 1
   

    return result

In [None]:
result = {}
tweets = ['ürün güzel', 'beğenmedim ', 'tavsiye ederim', 'kötü olmuş', 'almalısın']
ys = [1, 0, 1, 0, 1]
count_comments(result, tweets, ys)

In [None]:
freqs = count_comments({}, train_x, train_y)

In [None]:
def train_naive_bayes(freqs, train_x, train_y):
    
    loglikelihood = {}
    logprior = 0

    vocab = [pair[0] for pair in freqs.keys()]
    
    V = len(set(vocab))

    N_pos = N_neg = 0
    for pair in freqs.keys():
        if pair[1] > 0:
            N_pos += freqs[pair]

        else:
            N_neg += freqs[pair]
    
    D = len(train_y)

    D_pos = sum(train_y) / D

    D_neg = (1 - D_pos)

    logprior = np.log(D_pos) - np.log(D_neg)
    
    for word in vocab:
        freq_pos = lookup(freqs, word, 1)  
        freq_neg = lookup(freqs, word, 0)
        
        p_w_pos = (freq_pos + 1) / (N_pos + V)  
        p_w_neg = (freq_neg + 1) / (N_neg + V)

        loglikelihood[word] = np.log(p_w_pos) - np.log(p_w_neg)
   
    return logprior, loglikelihood

In [None]:
logprior, loglikelihood = train_naive_bayes(freqs, train_x, train_y)
print(logprior)

In [None]:
def naive_bayes_predict(tweet, logprior, loglikelihood):
    word_l = [word for word in process_comment(tweet)]
    p = 0
    p += logprior
    for word in word_l:

        if word in loglikelihood:
            p += loglikelihood[word]

    return p

In [None]:
my_comment = 'ürün kırık geldi hiç beğenmedim'
p = naive_bayes_predict(my_comment, logprior, loglikelihood)
print('The expected output is', p)

In [None]:
my_comment= 'herkese tavsiye ederim'
p = naive_bayes_predict(my_comment, logprior, loglikelihood)
print('The expected output is', p)

In [None]:
my_comment= 'ortalama'
p = naive_bayes_predict(my_comment, logprior, loglikelihood)
print('The expected output is', p)

In [None]:
def test_naive_bayes(test_x, test_y, logprior, loglikelihood, naive_bayes_predict=naive_bayes_predict):
    accuracy = 0

    y_hats = []
    for tweet in test_x:
        if naive_bayes_predict(tweet, logprior, loglikelihood) > 0:
            y_hat_i = 1
        else:
            y_hat_i = 0

        y_hats.append(y_hat_i)

    error = sum(abs(y_hat - y) for y_hat, y in zip(y_hats, test_y)) / len(y_hats)

   
    accuracy = 1 - error
    return accuracy

In [None]:
print("Naive Bayes accuracy = %0.4f" %
      (test_naive_bayes(test_x, test_y, logprior, loglikelihood)))

In [None]:
for comment in ['babam çok sevdi',
              'gerçekten harika', 
              'tam istediğim gibi', 
              'küçük geldi iade', 
              'çok pahalı', 
              'güzel güzel', 
              'güzel güzel güzel', 
              'güzel güzel güzel güzel']:
    p = naive_bayes_predict(comment, logprior, loglikelihood)
    print(f'{comment} -> {p:.2f} -> {"Pozitif" if p > 0 else "Negatif"}')