In [None]:
import numpy as np
import pandas as pd

from sklearn.naive_bayes import MultinomialNB

def calculatewordfreq(words):
    wordfreq = {}
    for word in words:
        wordfreq[word] = wordfreq.get(word,0) + 1
    return wordfreq

def calculate_ngrams(docs, n):
    ngram_list = []
    for doc in docs:
        words = doc.split()
        doc_ngrams = [tuple(words[i:i+n]) for i in range(len(words)-n+1)]
        ngram_list.append(doc_ngrams)
    return ngram_list

def ngrams_to_vector(ngrams, vocabulary):
    vector = []
    for doc in ngrams:
        doc_counts = calculatewordfreq(doc)
        vector.append([doc_counts.get(term, 0) for term in vocabulary])
    return vector

data = pd.read_csv('Musical_instruments_reviews.csv')
x = data.iloc[:, 4].values
y = data.iloc[:, 5].values

# Preprocessing y values
for i in range(len(y)):
    if(y[i] == 4 or y[i] == 5):
        y[i] = 1
    elif(y[i] == 3):
        y[i] = 0
    else:
        y[i] = -1

# Splitting data for balanced classes
x2 = []
y2 = []
c = {-1: 0, 0: 0, 1: 0}
for z in range(len(x)):
    if(c[y[z]] <= 467):
        x2.append(x[z])
        c[y[z]] += 1
        y2.append(y[z])

x = x2
y = y2

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

n = int(input("Enter the value of n for n-grams: "))
xtrain_ngrams = calculate_ngrams(x_train, n)
xtest_ngrams = calculate_ngrams(x_test, n)

vocabulary = set(gram for doc in xtrain_ngrams for gram in doc)


xtrain_vector = ngrams_to_vector(xtrain_ngrams, vocabulary)
xtest_vector = ngrams_to_vector(xtest_ngrams, vocabulary)


multinomial_nb = MultinomialNB()

# Training the classifier
multinomial_nb.fit(xtrain_vector, y_train)

# Making predictions on the test set
y_pred = multinomial_nb.predict(xtest_vector)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, y_pred))

review = "average movie. very neutral"
review_ngrams = calculate_ngrams([review], n)
review_vector = ngrams_to_vector(review_ngrams, vocabulary)
res = multinomial_nb.predict(review_vector)
print(res[0])  # Prediction for the review



review = "best product highly recommended"
review_ngrams = calculate_ngrams([review], n)
review_vector = ngrams_to_vector(review_ngrams, vocabulary)
res = multinomial_nb.predict(review_vector)
print(res[0])  # Prediction for the review


review = "worst product highly not recommended"
review_ngrams = calculate_ngrams([review], n)
review_vector = ngrams_to_vector(review_ngrams, vocabulary)
res = multinomial_nb.predict(review_vector)
print(res[0])  # Prediction for the review

Enter the value of n for n-grams: 1
0.6370106761565836
0.0
1.0
-1.0
