## NLP Model



## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
dataset = pd.read_csv('/content/drive/MyDrive/amazon_alexa.tsv', delimiter = '\t', quoting = 3)

In [None]:
dataset.describe()

Unnamed: 0,rating,feedback
count,3150.0,3150.0
mean,4.463175,0.918413
std,1.068506,0.273778
min,1.0,0.0
25%,4.0,1.0
50%,5.0,1.0
75%,5.0,1.0
max,5.0,1.0


In [None]:
# checking if there is any null data or not

dataset.isnull().any()

rating              False
date                False
variation           False
verified_reviews    False
feedback            False
dtype: bool

## Cleaning the texts

In [None]:
import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
corpus = []

# To clean the reviews
for i in range(0, 3150):
  
  reviews = re.sub('[^a-zA-Z]', ' ', dataset['verified_reviews'][i]) # for ignore punctions
  reviews = reviews.lower()
  reviews = reviews.split()
  p_s = PorterStemmer()
  all_stopwords = stopwords.words('english') 
  all_stopwords.remove('not') # Here from the stopwords we are removing 'not' as it can become a positive sentiment
  reviews = [p_s.stem(word) for word in reviews if not word in set(all_stopwords)] # Cleaning Each of the review 
  reviews = ' '.join(reviews)
  corpus.append(reviews)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
print(corpus)

['love echo', 'love', 'sometim play game answer question correctli alexa say got wrong answer like abl turn light away home', 'lot fun thing yr old learn dinosaur control light play game like categori nice sound play music well', 'music', 'receiv echo gift need anoth bluetooth someth play music easili access found smart speaker wait see els', 'without cellphon cannot use mani featur ipad not see use great alarm u r almost deaf hear alarm bedroom live room reason enough keep fun ask random question hear respons not seem smartbon polit yet', 'think th one purchas work get one everi room hous realli like featur offer specifili play music echo control light throughout hous', 'look great', 'love listen song heard sinc childhood get news weather inform great', 'sent year old dad talk constantli', 'love learn knew thing eveyday still figur everyth work far easi use understand make laugh time', 'purchas mother knee problem give someth tri come not get around fast like enjoy littl big thing ale

## Creating the Bag of Words model

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 2500)
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, -1].values

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

## Training the Deep learning model on the Training set

In [None]:
import tensorflow as tf

In [None]:
ann=tf.keras.models.Sequential()
# Hidden Layer 1
ann.add(tf.keras.layers.Dense(units=10,activation='relu'))
# Hidden Layer 2
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
#Output Layer 
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

In [None]:
from tensorflow.keras.optimizers import Adam
ann.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [None]:
ann.fit(X_train,y_train,batch_size=32,epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f4da3766e90>

## Predicting the Test set results

In [None]:
y_pred=ann.predict(X_test)
y_pred=(y_pred>0.5)
print(y_pred)

[[ True]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [False]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 [ True]
 

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)
accuracy_score(y_test,y_pred)

[[ 23  31]
 [  8 568]]


0.9380952380952381

#Single Prediction

In [None]:
 # 1
input_text1 = "Amazon Alexa is the best product. It gives me all the updates on news and new songs. I can also control my devices from Alexa. Really Happy."
input_array1 = cv.transform([input_text1]).toarray()
y_pred1=ann.predict(input_array1)
y_pred1=(y_pred1>0.5)
print(y_pred1)

[[ True]]


In [None]:
# 2
input_text2 = "I tried Alexa . But it did not work for me. the speaker voice is too low and it does not identify Indian accent. I think Amazon can do better. Buying alexa is waste of money."
input_array2 = cv.transform([input_text2]).toarray()
y_pred2=ann.predict(input_array2)
y_pred2=(y_pred2>0.5)
print(y_pred2)


[[False]]
