In [1]:
import numpy as np
import pandas as pd

In [2]:
dataset = pd.read_csv('RestaurantReviews_Fresh.tsv', delimiter = '\t', quoting = 3)

In [3]:
dataset.shape

(100, 1)

In [4]:
dataset.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


In [5]:
import re
import nltk

nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bharg\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
corpus = []

for i in range(dataset.shape[0]):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower().split()
    review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
    review = ' '.join(review)
    corpus.append(review)

corpus

['spend money elsewher',
 'regular toast bread equal satisfi occasion pat butter mmmm',
 'buffet bellagio far anticip',
 'drink weak peopl',
 'order not correct',
 'also feel like chip bought not made hous',
 'disappoint dinner went elsewher dessert',
 'chip sal amaz',
 'return',
 'new fav vega buffet spot',
 'serious cannot believ owner mani unexperienc employe run around like chicken head cut',
 'sad',
 'felt insult disrespect could talk judg anoth human like',
 'call steakhous properli cook steak understand',
 'not impress concept food',
 'thing crazi guacamol like pur ed',
 'realli noth postino hope experi better',
 'got food poison buffet',
 'brought fresh batch fri think yay someth warm',
 'hilari yummi christma eve dinner rememb biggest fail entir trip us',
 'needless say go back anytim soon',
 'place disgust',
 'everi time eat see care teamwork profession degre',
 'ri style calamari joke',
 'howev much garlic fondu bare edibl',
 'could bare stomach meal complain busi lunch',
 '

In [7]:
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cv = pickle.load(open('bow_sentiment_model.pkl', 'rb'))

In [8]:
X_fresh = cv.transform(corpus).toarray()
X_fresh.shape

(100, 1420)

In [9]:
import joblib
classifier = joblib.load('Classifier_sentiment_model')

In [10]:
y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 1 1 0 0 1 0 0 0 1 1 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 1 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 1 1 0
 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0]


In [11]:
dataset['predicted_label'] = y_pred.tolist()
dataset.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,0


In [12]:
dataset.to_csv('Predicted_Sentiments.tsv', sep = '\t', encoding = 'UTF-8', index = False)