### Importing the libraries

In [1]:

import numpy as np
import pandas as pd


### Importing the dataset

In [2]:
dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3)

### Checking out the data

In [3]:
dataset.head(5)

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [4]:
dataset.shape

(1000, 2)

In [5]:
# Cleaning the texts
import re
import nltk

In [6]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to C:\Users\mayank
[nltk_data]     singh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Cleaning of text

In [7]:
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [8]:
corpus = []
for i in range(0,len(dataset) ):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

### Creating the Bag of Words model

In [9]:
from sklearn.feature_extraction.text import CountVectorizer

In [10]:
cv = CountVectorizer(max_features = 1500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

In [11]:
X.shape

(1000, 1500)

### Splitting the dataset into the Training set and Test set

In [12]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 0)



### Fitting Naive Bayes to the Training set

In [13]:
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

GaussianNB(priors=None)

### Predicting the Test set results

In [14]:
y_pred = classifier.predict(X_test)

### Making the Confusion Matrix

In [15]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

In [16]:
print("Confusion Matrix")
print(cm)

Confusion Matrix
[[ 83  60]
 [ 26 131]]


In [17]:
from sklearn.metrics import accuracy_score
print("Accuray of the Result is {x} % ".format(x=accuracy_score(y_test,y_pred)*100))

Accuray of the Result is 71.33333333333334 % 
