In [4]:
import pandas as pd
import time
import pickle
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import TfidfVectorizer

In [5]:
trainData = pd.read_csv("https://raw.githubusercontent.com/Vasistareddy/sentiment_analysis/master/data/train.csv")
testData = pd.read_csv("https://raw.githubusercontent.com/Vasistareddy/sentiment_analysis/master/data/test.csv")

# Display sample
trainData.sample(frac=1).head()

Unnamed: 0,Content,Label
162,plot : odin is a great high school basketball ...,pos
600,bob the happy bastard's quickie review : \nthe...,pos
659,good films are hard to find these days . \ngre...,pos
1099,"so ask yourself what "" 8mm "" ( "" eight millime...",neg
337,will hunting ( matt damon ) is a natural geniu...,pos


In [6]:
vectorizer = TfidfVectorizer(min_df=5, max_df=0.8, sublinear_tf=True, use_idf=True)
train_vectors = vectorizer.fit_transform(trainData['Content'])
test_vectors = vectorizer.transform(testData['Content'])

In [8]:
classifier_linear = svm.SVC(kernel='linear')
t0 = time.time()
classifier_linear.fit(train_vectors, trainData['Label'])
t1 = time.time()
prediction_linear = classifier_linear.predict(test_vectors)
t2 = time.time()

print("Training time: %fs; Prediction time: %fs" % (t1 - t0, t2 - t1))

Training time: 7.166035s; Prediction time: 0.596265s


In [9]:
report = classification_report(testData['Label'], prediction_linear, output_dict=True)
print('positive: ', report['pos'])
print('negative: ', report['neg'])

positive:  {'precision': 0.9191919191919192, 'recall': 0.91, 'f1-score': 0.914572864321608, 'support': 100.0}
negative:  {'precision': 0.9108910891089109, 'recall': 0.92, 'f1-score': 0.9154228855721394, 'support': 100.0}


In [10]:
reviews = [
    "SUPERB, I AM IN LOVE IN THIS PHONE",
    "Do not purchase this product. My cell phone blast when I switched the charger",
    "I received defective piece display is not working properly",
    "It's not even 5 days since i purchased this product.\nI would say this a specially blended worst Phone in all formats. ..."
]

for review in reviews:
    review_vector = vectorizer.transform([review])
    print(f"Review: {review}\nPrediction: {classifier_linear.predict(review_vector)[0]}\n")

Review: SUPERB, I AM IN LOVE IN THIS PHONE
Prediction: pos

Review: Do not purchase this product. My cell phone blast when I switched the charger
Prediction: neg

Review: I received defective piece display is not working properly
Prediction: neg

Review: It's not even 5 days since i purchased this product.
I would say this a specially blended worst Phone in all formats. ...
Prediction: neg



In [11]:
pickle.dump(vectorizer, open('vectorizer.sav', 'wb'))
pickle.dump(classifier_linear, open('classifier.sav', 'wb'))