In [2]:
import numpy as np
import pandas as pd
from nltk.corpus import stopwords, brown
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
import sys
from bs4 import BeautifulSoup
import re
from sklearn import naive_bayes
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.cross_validation import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
%matplotlib inline



In [3]:
data = pd.read_csv('labeledTrainData.tsv', delimiter = '\t', quoting = 3)

In [4]:
#train = train[:10000]
data = data.drop(['id'], axis = 1)

In [5]:
#tokenization
data["review"] = [re.sub("[^A-Za-z]", " ", BeautifulSoup(w, 'html.parser').get_text()).lower().split() for w in data["review"]]

In [6]:
#remove stopwords
stop_words = set(stopwords.words("english"))
lemmatizer = WordNetLemmatizer()
data["review"] = [[lemmatizer.lemmatize(w) for w in sentence if lemmatizer.lemmatize(w) not in stop_words] for sentence in data["review"]]

In [7]:
reviews = [" ".join(w) for w in data["review"]]
trainX, testX, trainY, testY = train_test_split(reviews, data["sentiment"], test_size = 0.2)

In [8]:
#pipeline = Pipeline([('bow', CountVectorizer()), ('tfidf', TfidfTransformer()), ('classifier', naive_bayes.MultinomialNB())])
pipeline = Pipeline([('bow', CountVectorizer()), ('tfidf', TfidfTransformer()), ('classifier', LogisticRegression())])

In [9]:
pipeline.fit(trainX, trainY)

Pipeline(steps=[('bow', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip_...ty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])

In [20]:
ans = pipeline.predict(trainX)
ans2 = pipeline.predict(testX)

In [12]:
print(accuracy_score(ans, trainY) * 100)
print(accuracy_score(ans2, testY) * 100)

93.595
89.42


In [13]:
print(classification_report(trainY, ans))
print(classification_report(testY, ans2))

             precision    recall  f1-score   support

          0       0.94      0.93      0.94      9991
          1       0.93      0.95      0.94     10009

avg / total       0.94      0.94      0.94     20000

             precision    recall  f1-score   support

          0       0.90      0.89      0.89      2509
          1       0.89      0.90      0.89      2491

avg / total       0.89      0.89      0.89      5000



In [57]:
cX = ['''The vibe k5 note comes with a 5.5 inch full hd display along with a Helio P10 mediatek octa core chip and couples with a 4gb ram with the vibe user interface. It has Dolby Atmos sound system inbuilt. It has a 13mp camera at the rear and a 8mp front camera.Now coming to the review

Design: it has a full metal unibody which doesn't feel premium and the finger print sensor is situated just below the camera .To be honest the design seems pretty boring and seems to look like just a budget phone which doesn't feel premium.

Performance: The p10 chip from mediatek was never for heavy gaming although it does give a decent gaming performance for games like Candy Crush, Fruit Ninja and others but it does take some time to load. But for daily use it should be just fine.

Battery: Battery is more on towards the lower side it has a 3000mah one which lasts for about 5hours on heavy usage and on normal usage 7-8 hours which is ok for people who don't use the phone for gaming, browsing for long times, etc. But for general use its pretty decent.

Camera: the 13mp rear camera was giving some great photos when under adequate amount of light but it did show us a lot of pixels here and there when under low lighting conditions.

Sound output: with the Dolby Atmos speakers it gives crisp and loud volumes which makes it great for music lovers.

Overall: if not for the bugs in vibe user interface and the poor low light performance I do not recommend this phone as the user interface hangs alot.
''']
if((pipeline.predict(cX)[0])):
    print("Positive")
else:
    print("Negative")

Negative
