In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import plotly.express as px
# import plotly.graph_objects as go
# import plotly.figure_factory as ff
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
# from nltk.tokenize import word_tokenize as wt 
# from collections import Counter
import gensim
# import textblob
import spacy
import tensorflow as tf
import pickle
import string
import re
from tensorflow.keras.preprocessing.text import one_hot, Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Dense
from tensorflow.keras.layers import Flatten, GlobalMaxPooling1D, Embedding, Conv1D, LSTM, Bidirectional

In [2]:
reviews ={"reviews":[ " I can't believe I wasted my money on this keyboard and mouse",
            "I really love this device",
            "It's quite good, but not so great. They can do better",
            "I hate this device, terrible!!!",
            "Where the hell is my refund for this damn product (mouse)",
            "I believe Ebay is so much better than the crap that is HERE!!!",
            "worst keyboard ever",
            "distasteful to the fullest!!!",
            "spoilt on the first day",
            "never shopping here again, waste of money"]}

In [3]:
# https://stackoverflow.com/questions/12851791/removing-numbers-from-string
def preprocess(sent):
    '''Cleans text data up, leaving only 2 or
        more char long non-stepwords composed of A-Z & a-z only
        in lowercase'''
    # lowercase
    sentence = sent.lower()

    # Remove RT
    sentence = re.sub('RT @\w+: '," ",sentence)

    # Remove special characters
    sentence = re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ", sentence)

    # Removing digits
    sentence = sentence.translate(str.maketrans('', '', string.digits))

    # Removing puntuactions
    # sentence = sentence.translate(str.maketrans('', '', string.punctuation))

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)  
    # When we remove apostrophe from the word "Mark's", 
    # the apostrophe is replaced by an empty space. 
    # Hence, we are left with single character "s" that we are removing here.

    # Remove multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)  
    # Next, we remove all the single characters and replace it by a space 
    # which creates multiple spaces in our text. 
    # Finally, we remove the multiple spaces from our text as well.

    return sentence


In [4]:
reviews = pd.DataFrame(reviews)

In [5]:
reviews

Unnamed: 0,reviews
0,I can't believe I wasted my money on this key...
1,I really love this device
2,"It's quite good, but not so great. They can do..."
3,"I hate this device, terrible!!!"
4,Where the hell is my refund for this damn prod...
5,I believe Ebay is so much better than the crap...
6,worst keyboard ever
7,distasteful to the fullest!!!
8,spoilt on the first day
9,"never shopping here again, waste of money"


In [6]:
from spacy.lang.en.stop_words import STOP_WORDS

# print(STOP_WORDS,'\n') # <- set of Spacy's default stop words
all_stopwords = STOP_WORDS
# Adding several stopwords
# all_stopwords |= {'not',"no", "n't", 'n’t','n‘t','cannot','none','without','against'}

# Removing several stop words
all_stopwords-= {'not',"no", "n't", 'n’t','n‘t','cannot','none','without','against','off','against','too'}
my_stop_words = set(all_stopwords) # My own stop words

In [7]:
reviews["cleaned"] = reviews["reviews"].apply(preprocess)

In [8]:
reviews

Unnamed: 0,reviews,cleaned
0,I can't believe I wasted my money on this key...,can believe wasted my money on this keyboard ...
1,I really love this device,i really love this device
2,"It's quite good, but not so great. They can do...",it quite good but not so great they can do better
3,"I hate this device, terrible!!!",i hate this device terrible
4,Where the hell is my refund for this damn prod...,where the hell is my refund for this damn prod...
5,I believe Ebay is so much better than the crap...,i believe ebay is so much better than the crap...
6,worst keyboard ever,worst keyboard ever
7,distasteful to the fullest!!!,distasteful to the fullest
8,spoilt on the first day,spoilt on the first day
9,"never shopping here again, waste of money",never shopping here again waste of money


In [9]:
nlp = spacy.load("en_core_web_sm")
def spacy_tokeniser(sent):
    sent = sent.strip().lower()
    doc = nlp(sent)
    mytokens = [token.lemma_ for token in doc if token.text not in my_stop_words]
    return mytokens

In [10]:
reviews['tokens']= reviews['cleaned'].apply(spacy_tokeniser)

In [11]:
reviews

Unnamed: 0,reviews,cleaned,tokens
0,I can't believe I wasted my money on this key...,can believe wasted my money on this keyboard ...,"[believe, waste, money, keyboard, mouse]"
1,I really love this device,i really love this device,"[love, device]"
2,"It's quite good, but not so great. They can do...",it quite good but not so great they can do better,"[good, not, great, well]"
3,"I hate this device, terrible!!!",i hate this device terrible,"[hate, device, terrible]"
4,Where the hell is my refund for this damn prod...,where the hell is my refund for this damn prod...,"[hell, refund, damn, product, mouse]"
5,I believe Ebay is so much better than the crap...,i believe ebay is so much better than the crap...,"[believe, ebay, well, crap]"
6,worst keyboard ever,worst keyboard ever,"[bad, keyboard]"
7,distasteful to the fullest!!!,distasteful to the fullest,"[distasteful, full]"
8,spoilt on the first day,spoilt on the first day,"[spoilt, day]"
9,"never shopping here again, waste of money",never shopping here again waste of money,"[shop, waste, money]"


In [13]:
vectorizer = pickle.load(open('../../../../../../word2vec_files/glove_twitter_200.pkl', "rb"))
model = tf.keras.models.load_model("../saved_model/saved_bi_lstm_model")
loaded_tokenizer = pickle.load(open("../saved_model/tokeniser.pkl",'rb'))

In [14]:
X = reviews["tokens"]

In [15]:
X[0]

['believe', 'waste', 'money', 'keyboard', 'mouse']

In [16]:
X = loaded_tokenizer.texts_to_sequences(X)
X[0]

[482, 129, 54, 16, 2]

In [17]:
max_len = 300
X = pad_sequences(X,padding="post",maxlen=max_len)

In [18]:
X[1].shape # shape (300,)
# X[1].ndim # dimension

(300,)

In [20]:
X[5]

array([482,  56, 492,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   

In [21]:
y_pred = model.predict(X)



In [22]:
y_pred

array([[1.2046933e-01, 8.7769550e-01, 1.8351497e-03],
       [4.7083075e-05, 3.1707799e-03, 9.9678206e-01],
       [1.4427634e-03, 2.3255816e-02, 9.7530138e-01],
       [9.2879611e-01, 7.0344016e-02, 8.5986435e-04],
       [5.4354697e-01, 4.5634258e-01, 1.1048846e-04],
       [4.2612899e-02, 9.3917608e-01, 1.8211039e-02],
       [7.5812197e-01, 2.3495351e-01, 6.9245468e-03],
       [1.4287298e-02, 9.8175645e-01, 3.9562308e-03],
       [2.9308384e-02, 8.9438653e-01, 7.6305129e-02],
       [1.9438477e-02, 9.7913235e-01, 1.4291094e-03]], dtype=float32)

In [23]:
reviews["predictions"] = np.argmax(y_pred, axis = 1)
reviews

Unnamed: 0,reviews,cleaned,tokens,predictions
0,I can't believe I wasted my money on this key...,can believe wasted my money on this keyboard ...,"[believe, waste, money, keyboard, mouse]",1
1,I really love this device,i really love this device,"[love, device]",2
2,"It's quite good, but not so great. They can do...",it quite good but not so great they can do better,"[good, not, great, well]",2
3,"I hate this device, terrible!!!",i hate this device terrible,"[hate, device, terrible]",0
4,Where the hell is my refund for this damn prod...,where the hell is my refund for this damn prod...,"[hell, refund, damn, product, mouse]",0
5,I believe Ebay is so much better than the crap...,i believe ebay is so much better than the crap...,"[believe, ebay, well, crap]",1
6,worst keyboard ever,worst keyboard ever,"[bad, keyboard]",0
7,distasteful to the fullest!!!,distasteful to the fullest,"[distasteful, full]",1
8,spoilt on the first day,spoilt on the first day,"[spoilt, day]",1
9,"never shopping here again, waste of money",never shopping here again waste of money,"[shop, waste, money]",1
