In [2]:
import numpy as np
import pandas as pd
from nltk import word_tokenize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [16]:
df = pd.read_csv('Musical_instruments_reviews.csv')
df['sentiment'] = df['overall'].map({5 : 1 , 4 : 1 , 3 : 0 , 2: -1 , 1: -1})

pos_df = df[df['sentiment'] == 1]
neu_df = df[df['sentiment'] == 0]
neg_df = df[df['sentiment'] == -1]

sample_size = min(len(pos_df) , len(neu_df) , len(neg_df))

pos_sample = pos_df.sample(sample_size , random_state = 42)
neu_sample = neu_df.sample(sample_size , random_state = 42)
neg_sample = neg_df.sample(sample_size , random_state = 42)

balanced_df = pd.concat([pos_sample , neu_sample,neg_sample])
train_df , test_df = train_test_split(balanced_df , test_size = 0.2, random_state = 42)

def generate_Ngram(text , n):
    words = text.split()
    ngrams = []
    for i in range(len(words) - n + 1):
        ngram = ' '.join(words[i:i+n])
        ngrams.append(ngram)
    return ngrams

def create_vocab(corpus , n):
    vocab = set()
    for text in corpus:
        words = generate_Ngram(text, n)
        vocab.update(words)
    return vocab

def text_to_vector(text,vocab,n):
    text_ngram = generate_Ngram(text , n)
    vector = np.zeros(len(vocab))
    for word in text_ngram:
        if word in vocab:
            vector[list(vocab).index(word)] += 1
    return vector

n = 2
vocab = create_vocab(train_df['summary'], n)

X_train = np.array([text_to_vector(text , vocab, n) for text in train_df['summary']])
y_train = train_df['sentiment'].values

model = LogisticRegression()
model.fit(X_train , y_train)

def predicted(review , vocab):
    review_vec = text_to_vector(review, vocab ,n).reshape(1 , -1)
    senti = model.predict(review_vec)[0]
    return senti

new_review = "good"
senti_map = {1:"Positive" , 0:"Neutral" , -1:"Negative"}
predicted_val = predicted(new_review, vocab)
print(f"Prediction: {senti_map[predicted_val]}")

Prediction: Positive


In [13]:
df.head()

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,sentiment
0,A2IBPI20UZIR0U,1384719342,"cassandra tu ""Yeah, well, that's just like, u...","[0, 0]","Not much to write about here, but it does exac...",5.0,good,1393545600,"02 28, 2014",1
1,A14VAT5EAX3D9S,1384719342,Jake,"[13, 14]",The product does exactly as it should and is q...,5.0,Jake,1363392000,"03 16, 2013",1
2,A195EZSQDW3E21,1384719342,"Rick Bennette ""Rick Bennette""","[1, 1]",The primary job of this device is to block the...,5.0,It Does The Job Well,1377648000,"08 28, 2013",1
3,A2C00NNG1ZQQG2,1384719342,"RustyBill ""Sunday Rocker""","[0, 0]",Nice windscreen protects my MXL mic and preven...,5.0,GOOD WINDSCREEN FOR THE MONEY,1392336000,"02 14, 2014",1
4,A94QU4C90B1AX,1384719342,SEAN MASLANKA,"[0, 0]",This pop filter is great. It looks and perform...,5.0,No more pops when I record my vocals.,1392940800,"02 21, 2014",1
