# **Sentiment Analysis Using NLP**

In [25]:
import numpy as np
import pandas as pd

## **Importing Data Set**

In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
%cd /content/drive/MyDrive/Colab Notebooks/Sem_5/AI/Sentiment_Analysis
!ls

/content/drive/MyDrive/Colab Notebooks/Sem_5/AI/Sentiment_Analysis
a1_RestaurantReviews_HistoricDump.tsv
a2_RestaurantReviews_FreshDump.tsv
Assignment4_NLP_Sentiment_analysis_model.ipynb
c1_BoW_Sentiment_Model.pkl
c2_Classifier_Sentiment_Model
c3_Predicted_Sentiments_Fresh_Dump.tsv


In [28]:
dataSet = pd.read_csv('a2_RestaurantReviews_FreshDump.tsv', delimiter = '\t', quoting = 3)
dataSet.head()

Unnamed: 0,Review
0,Spend your money elsewhere.
1,Their regular toasted bread was equally satisf...
2,The Buffet at Bellagio was far from what I ant...
3,"And the drinks are WEAK, people!"
4,-My order was not correct.


## Data Cleaning

In [29]:
import re
import nltk

nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

all_stopwords = stopwords.words('english')
all_stopwords.remove('not')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [30]:
corpus=[]

for i in range(0, 100):
  review = re.sub('[^a-zA-Z]', ' ', dataSet['Review'][i])
  review = review.lower()
  review = review.split()
  review = [ps.stem(word) for word in review if not word in set(all_stopwords)]
  review = ' '.join(review)
  corpus.append(review)

## Data transformation

In [31]:
# Loading BoW dictionary
from sklearn.feature_extraction.text import CountVectorizer
import pickle
cvFile='c1_BoW_Sentiment_Model.pkl'
cv = pickle.load(open(cvFile, "rb"))

In [32]:
X_fresh = cv.transform(corpus).toarray()
X_fresh.shape

(100, 1420)

## Predictions (via sentiment classifier)

In [33]:
import joblib
classifier = joblib.load('c2_Classifier_Sentiment_Model')

In [34]:
y_pred = classifier.predict(X_fresh)
print(y_pred)

[0 1 1 0 1 1 1 1 0 1 0 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 0 0 1 1 1 0 0 1 0 0 1
 1 1 1 1 0 0 0 1 0 1 1 0 1 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 1 1 0 1 1 1
 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 1 0]


In [35]:
dataSet['predicted_label'] = y_pred.tolist()
dataSet.head()

Unnamed: 0,Review,predicted_label
0,Spend your money elsewhere.,0
1,Their regular toasted bread was equally satisf...,1
2,The Buffet at Bellagio was far from what I ant...,1
3,"And the drinks are WEAK, people!",0
4,-My order was not correct.,1


In [36]:
dataSet.to_csv("c3_Predicted_Sentiments_Fresh_Dump.tsv", sep='\t', encoding='UTF-8', index=False)

In [38]:
positive = 0
for i in y_pred:
  if(i==1):
    positive+=1

positive = (positive/len(y_pred))*100
print("Positive:",positive,"%")
print("Negative:",100-positive,"%")

Positive: 44.0 %
Negative: 56.0 %
