In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

df = pd.read_csv('Musical_instruments_reviews.csv')
df['sentiment'] = df['overall'].map({5: 1, 4: 1, 3: 0, 2: -1, 1: -1})

positive_df = df[df['sentiment'] == 1]
neutral_df = df[df['sentiment'] == 0]
negative_df = df[df['sentiment'] == -1]

sample_size = min(len(positive_df), len(neutral_df), len(negative_df))

positive_sample = positive_df.sample(sample_size, random_state=42)
neutral_sample = neutral_df.sample(sample_size, random_state=42)
negative_sample = negative_df.sample(sample_size, random_state=42)

balanced_df = pd.concat([positive_sample, neutral_sample, negative_sample])

train_df, test_df = train_test_split(balanced_df, test_size=0.2, random_state=42)

def preprocess_text(text):
    words = text.lower().split()
    return words

def create_vocabulary(corpus):
    vocab = set()
    for text in corpus:
        words = preprocess_text(text)
        vocab.update(words)
    return vocab

vocab = create_vocabulary(train_df['summary'])

def text_to_bow(text, vocab):
    words = preprocess_text(text)
    vector = np.zeros(len(vocab))
    for word in words:
        if word in vocab:
            vector[list(vocab).index(word)] += 1
    return vector

X_train = np.array([text_to_bow(text, vocab) for text in train_df['summary']])
y_train = train_df['sentiment'].values

nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

def predict_sentiment(review, vocab):
    review_vector = text_to_bow(review, vocab).reshape(1, -1)
    sentiment = nb_classifier.predict(review_vector)[0]
    return sentiment

new_review = "This product is excellent"
senti_map = {1:"Positive", 0:"Neutral", -1:"Negative"}
predicted_sentiment = predict_sentiment(new_review, vocab)
print(f"Predicted Sentiment: {senti_map[predicted_sentiment]}")

Predicted Sentiment: Positive


In [26]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [29]:
df = pd.read_csv('Musical_instruments_reviews.csv')
df['sentiment'] = df['overall'].map({5:1 , 4:1 , 3:0 , 2:-1,1:-1})

pos_df = df[df['sentiment'] == 1]
nw_df = df[df['sentiment'] == 0]
neg_df = df[df['sentiment'] == -1]

sample_size = min(len(pos_df), len(nw_df) , len(neg_df))

pos_sample = pos_df.sample(sample_size, random_state = 42)
nw_sample = nw_df.sample(sample_size, random_state = 42)
neg_sample = neg_df.sample(sample_size, random_state = 42)

balanced_df = pd.concat([pos_sample, nw_sample, neg_sample])

train_df , test_df = train_test_split(balanced_df , test_size = 0.2, random_state = 42)

def preprocess(text):
    words = text.lower().split()
    return words

def create_vocab(corpus):
    vocab = set()
    for text in corpus:
        words = preprocess(text)
        vocab.update(words)
    return vocab

vocab = create_vocab(train_df['summary'])

def text_to_bow(text , vocab):
    words = preprocess(text)
    vector = np.zeros(len(vocab))
    for word in words:
        if word in vocab:
            vector[list(vocab).index(word)] += 1
    return vector



X_train = np.array([text_to_bow(text , vocab) for text in train_df['summary']])
y_train = train_df['sentiment'].values

nb_classifier = GaussianNB()
nb_classifier.fit(X_train , y_train)

def predict_sentiment(review, vocab):
    review_vector = text_to_bow(review, vocab).reshape(1, -1)
    sentiment = nb_classifier.predict(review_vector)[0]
    y_test = test_df['sentiment']
    y_pred = nb_classifier.predict(review_vector)
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    return sentiment

new_review = "This product is excellent"
senti_map = {1:"Positive", 0:"Neutral", -1:"Negative"}
predicted_sentiment = predict_sentiment(new_review, vocab)
print(predicted_sentiment)
print(f"Predicted Sentiment: {senti_map[predicted_sentiment]}")



ValueError: Found input variables with inconsistent numbers of samples: [281, 1]