# Natural Language Processing

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, cross_val_score,KFold
from sklearn.metrics import accuracy_score,confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

## Importing the dataset

In [2]:
df = pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3)

## Cleaning the texts

In [4]:
import re
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

corpus = []
for i in range(len(df)):
    #Text Normalisation
    review = re.sub('[^A-Za-z]',' ',df['Review'][i])
    
    #Text Tokenisation
    review = review.lower().split()
    
    #stopwords
    allstopwords = stopwords.words('english')
    allstopwords.remove('not')
    
    #Stemming
    ps = PorterStemmer()
    review = [ps.stem(word) for word in review if not word in set(allstopwords)]
    review = ' '.join(review)
    
    corpus.append(review)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\TEST\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Creating the Bag of Words model

In [5]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 1500)
x = cv.fit_transform(corpus).toarray()
y = df.iloc[:, 1].values

## Splitting the dataset into the Training set and Test set

In [6]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.20, random_state = 0)

## Training the model on the Training set

In [7]:
models = []
models.append(('KNN',KNeighborsClassifier(n_neighbors=9)))
models.append(('Lreg',LogisticRegression()))
models.append(('SVM',SVC(kernel='linear')))
models.append(('KVM',SVC(kernel='rbf')))
models.append(('GNB',GaussianNB()))
models.append(('DTC',DecisionTreeClassifier(criterion='entropy')))
models.append(('RFC',RandomForestClassifier(n_estimators=11,criterion='entropy')))

for name,model in models:
    model.fit(xtrain,ytrain)
    ypred = model.predict(xtest)
    print(f'{name}\n Accuracy: {accuracy_score(ytest,ypred)}\n\n Confusion Matrix:-\n{confusion_matrix(ytest,ypred)}\nClassification Report:-\n{classification_report(ytest,ypred)}\n')
    

KNN
 Accuracy: 0.64

 Confusion Matrix:-
[[73 24]
 [48 55]]
Classification Report:-
              precision    recall  f1-score   support

           0       0.60      0.75      0.67        97
           1       0.70      0.53      0.60       103

    accuracy                           0.64       200
   macro avg       0.65      0.64      0.64       200
weighted avg       0.65      0.64      0.64       200


Lreg
 Accuracy: 0.78

 Confusion Matrix:-
[[80 17]
 [27 76]]
Classification Report:-
              precision    recall  f1-score   support

           0       0.75      0.82      0.78        97
           1       0.82      0.74      0.78       103

    accuracy                           0.78       200
   macro avg       0.78      0.78      0.78       200
weighted avg       0.78      0.78      0.78       200


SVM
 Accuracy: 0.79

 Confusion Matrix:-
[[79 18]
 [24 79]]
Classification Report:-
              precision    recall  f1-score   support

           0       0.77      0.81   

In [8]:
classifier = SVC(kernel='linear')
classifier.fit(xtrain,ytrain)

SVC(kernel='linear')

In [9]:
def sentiment_analysis(sentence):
    newreview = sentence
    newreview = re.sub('[^a-zA-Z]', ' ', newreview)
    newreview = newreview.lower().split()
    
    allstopwords = stopwords.words('english')
    allstopwords.remove('not')
    
    ps = PorterStemmer()
    newreview = [ps.stem(word) for word in newreview if not word in set(allstopwords)]
    newreview = ' '.join(newreview)
    newcorpus = [newreview]
    
    newxtest = cv.transform(newcorpus).toarray()
    newypred = classifier.predict(newxtest)
    
    if newypred in [1]:
        return 'Positive'
    elif newypred in [0]:
        return 'Negative'
    else:
        return 'Neutral'

In [10]:
sentiment_analysis('i love you')

'Positive'

In [11]:
valid = df.sample(30)
valid

Unnamed: 0,Review,Liked
431,We loved the biscuits!!!,1
556,AVOID THIS ESTABLISHMENT!,0
546,Last night was my second time dining here and ...,1
346,"This place is like Chipotle, but BETTER.",1
614,An absolute must visit!,1
80,Coming here is like experiencing an underwhelm...,0
582,"If you look for authentic Thai food, go else w...",0
317,"Back to good BBQ, lighter fare, reasonable pri...",1
92,"Never been to Hard Rock Casino before, WILL NE...",0
358,Best fish I've ever had in my life!,1


In [12]:
valid = valid.reset_index()
valid = valid.drop('index',axis=1)
valid

Unnamed: 0,Review,Liked
0,We loved the biscuits!!!,1
1,AVOID THIS ESTABLISHMENT!,0
2,Last night was my second time dining here and ...,1
3,"This place is like Chipotle, but BETTER.",1
4,An absolute must visit!,1
5,Coming here is like experiencing an underwhelm...,0
6,"If you look for authentic Thai food, go else w...",0
7,"Back to good BBQ, lighter fare, reasonable pri...",1
8,"Never been to Hard Rock Casino before, WILL NE...",0
9,Best fish I've ever had in my life!,1


In [13]:
data = []

for i in range(len(valid)):
    sentence = valid.Review[i]
    result = sentiment_analysis(sentence)
    label = valid.Liked[i]
    report = [sentence,label,result]
    data.append(report)
    
validation = pd.DataFrame(data,columns=['Text','Label','Predicted Label'])

In [14]:
validation

Unnamed: 0,Text,Label,Predicted Label
0,We loved the biscuits!!!,1,Positive
1,AVOID THIS ESTABLISHMENT!,0,Negative
2,Last night was my second time dining here and ...,1,Positive
3,"This place is like Chipotle, but BETTER.",1,Negative
4,An absolute must visit!,1,Negative
5,Coming here is like experiencing an underwhelm...,0,Negative
6,"If you look for authentic Thai food, go else w...",0,Negative
7,"Back to good BBQ, lighter fare, reasonable pri...",1,Positive
8,"Never been to Hard Rock Casino before, WILL NE...",0,Negative
9,Best fish I've ever had in my life!,1,Positive
