# Product Review System Using Feedback Analysis

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import math
import warnings

warnings.filterwarnings('ignore') 
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)

df = pd.read_csv("Reviews_Data.csv")
del df["reviews.userCity"], df["reviews.userProvince"], df["reviews.username"], df["reviews.id"], df["reviews.didPurchase"]
data = df.copy()

dataAfter = data.dropna(subset=["reviews.rating"]) # Removes all NAN in reviews.rating
dataAfter["reviews.rating"] = dataAfter["reviews.rating"].astype(int) # typecasting reviews.ratings string -> int
 
from sklearn.model_selection import StratifiedShuffleSplit
split = StratifiedShuffleSplit(n_splits=5, test_size=0.2) # Train/Test Split 4:1
for train_index, test_index in split.split(dataAfter, dataAfter["reviews.rating"]): 
    strat_train = dataAfter.reindex(train_index)
    strat_test = dataAfter.reindex(test_index)

def sentiments(rating): # Sentiments to applied / Target Variable
    if (rating == 5) or (rating == 4):
        return "Positive"
    elif rating == 3:
        return "Neutral"
    elif (rating == 2) or (rating == 1):
        return "Negative"
    
# Applying sentiments to the reviews.rating
strat_train["Sentiment"] = strat_train["reviews.rating"].apply(sentiments)
strat_test["Sentiment"] = strat_test["reviews.rating"].apply(sentiments)

X_train = strat_train["reviews.text"]
X_train_targetSentiment = strat_train["Sentiment"]
X_test = strat_test["reviews.text"]
X_test_targetSentiment = strat_test["Sentiment"]

# Replace "nan" with space
X_train = X_train.fillna(' ')
X_test = X_test.fillna(' ')
X_train_targetSentiment = X_train_targetSentiment.fillna(' ')
X_test_targetSentiment = X_test_targetSentiment.fillna(' ')

# Text preprocessing and occurance counting
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train) 

# Applying TF-Idf 
tfidf_transformer = TfidfTransformer(use_idf=False)
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)

### Multinominal Naive Bayes Classifier

In [5]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
clf_multiNB_pipe = Pipeline([("vect", CountVectorizer()), 
                             ("tfidf", TfidfTransformer()),
                             ("clf_nominalNB", MultinomialNB())])
clf_multiNB_pipe.fit(X_train, X_train_targetSentiment)

predictedMultiNB = clf_multiNB_pipe.predict(X_test)
np.mean(predictedMultiNB == X_test_targetSentiment)*100

93.43055154490327

### Support Vector Machine Classifier

In [6]:
from sklearn.svm import LinearSVC
clf_linearSVC_pipe = Pipeline([("vector", CountVectorizer()), 
                               ("tfidf", TfidfTransformer()),
                               ("linearSVC", LinearSVC())])
clf_linearSVC_pipe.fit(X_train, X_train_targetSentiment)

predictedLinearSVC = clf_linearSVC_pipe.predict(X_test)
np.mean(predictedLinearSVC == X_test_targetSentiment)*100

93.83482529598614

### Logistic Regression Classifier

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
clf_logReg_pipe = Pipeline([("vect", CountVectorizer()), 
                            ("tfidf", TfidfTransformer()), 
                            ("clf_logReg", LogisticRegression())])
clf_logReg_pipe.fit(X_train, X_train_targetSentiment)

import numpy as np
predictedLogReg = clf_logReg_pipe.predict(X_test)
np.mean(predictedLogReg == X_test_targetSentiment)*100

93.80594859948022

### Decision Tree Classifier

In [8]:
from sklearn.tree import DecisionTreeClassifier
clf_decisionTree_pipe = Pipeline([("vect", CountVectorizer()), 
                                  ("tfidf", TfidfTransformer()), 
                                  ("clf_decisionTree", DecisionTreeClassifier())
                                 ])
clf_decisionTree_pipe.fit(X_train, X_train_targetSentiment)

predictedDecisionTree = clf_decisionTree_pipe.predict(X_test)
np.mean(predictedDecisionTree == X_test_targetSentiment)*100

89.8931562229281

### Random Forest Classifier

In [9]:
from sklearn.ensemble import RandomForestClassifier
clf_randomForest_pipe = Pipeline([("vect", CountVectorizer()), 
                                  ("tfidf", TfidfTransformer()), 
                                  ("clf_randomForest", RandomForestClassifier())
                                 ])
clf_randomForest_pipe.fit(X_train, X_train_targetSentiment)

predictedRandomForest = clf_randomForest_pipe.predict(X_test)
np.mean(predictedRandomForest == X_test_targetSentiment)*100

93.4883049379151

In [11]:
testing_data = ["The tablet is good, really liked it.",
            "The tablet is ok, and it works fine.", 
            "The tablet is not good and does not work."]

pipe_list = [clf_multiNB_pipe,clf_linearSVC_pipe,clf_logReg_pipe,clf_decisionTree_pipe,clf_randomForest_pipe]
pipe_names = ["MNB Classifier","Linear SVC", "Logistic Regression", "Decesion Tree", "Random Forest"]
for name, model in zip(pipe_names,pipe_list):
    print(name,"\n--------------",pd.DataFrame(model.predict(testing_data)))
    print("\n")

MNB Classifier 
--------------           0
0  Positive
1  Positive
2  Positive


Linear SVC 
--------------           0
0  Positive
1   Neutral
2   Neutral


Logistic Regression 
--------------           0
0  Positive
1   Neutral
2   Neutral


Decesion Tree 
--------------           0
0  Negative
1  Positive
2   Neutral


Random Forest 
--------------           0
0  Positive
1  Positive
2  Positive


