In [67]:
import numpy as np
import tensorflow as tf
import pickle
from keras.layers.experimental.preprocessing import TextVectorization
import re
import string

import nltk
from nltk import pos_tag
from nltk.corpus import stopwords
nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

translator = str.maketrans("","",string.punctuation)
lemmatizer = nltk.stem.wordnet.WordNetLemmatizer()
STOP_WORDS = stopwords.words('english')


# create removeNewline function
def removeNewline(content):
    return re.sub("\n", "", content)

# convert to lowercase
def toLower(content):
    return content.lower()

# remove punctuation
def removePunctuation(content):
    return content.translate(translator)

# word lemmatization
def wordLemmatization(content):
    words = nltk.word_tokenize(content)
    tagged_words = pos_tag(words)
    lemma = [lemmatizer.lemmatize(word, "v") if pos in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'] else word for word, pos in tagged_words]
    return " ".join(lemma)

# remove single character
def removeSingleChar(content):
    return re.sub(r'\b\w{1}\b', '', content)

# remove pronounce
def removePronouns(content):
    words = nltk.word_tokenize(content)
    tagged_words = pos_tag(words)
    non_pronounces = [word for word, pos in tagged_words if pos not in ['PRP', 'PRP$', 'WP', 'WP$']]
    return " ".join(non_pronounces)

# remove stopwords
def removeStopwords(content):
    words = []
    words = content.split(" ")
    all_words = set(words)
    common_words = set(words).intersection(set(STOP_WORDS))
    uncommon_words = list(all_words - common_words)
    return " ".join(uncommon_words)

# remove common
def removeCommon(content):
    with open("../data/common_word.txt", "r") as f:
        temp_common_words = f.readlines()
        f.close()
    common_words = [removeNewline(word) for word in temp_common_words]
    words = set(content.split(" "))
    common_words = set(common_words)
    found_common_words = list(common_words.intersection(words))
    uncommon_words = [word for word in content.split(" ") if word not in found_common_words]
    return " ".join(uncommon_words)

# count positive words
def pos_count(content):
    with open("../data/positive-reviews.txt") as f:
        temp_pos_words = f.readlines()
        f.close()
    postitive_words = [removeNewline(word) for word in temp_pos_words]
    return len([word for word in content.split() if word in postitive_words])

# count negative words
def neg_count(content):
    with open("../data/negative-reviews.txt") as f:
        temp_neg_words = f.readlines()
        f.close()
    negative_words = [removeNewline(word) for word in temp_neg_words]
    return len([word for word in content.split() if word in negative_words])

# check if contain 'no'
def isContainNo(content):
    return 1 if 'no' in content.split() else 0

# check if contain '!'
def isContainExclamation(content):
    return 1 if '!' in content else 0

# check if contain not 
def isContainNot(content):
    return 1 if 'not' in content.split() else 0

# check if contain but
def isContainBut(content):
    return 1 if 'but' in content.split() else 0

# check if contain pronouns
def pron_count(content):
    return len([word for word in content.split() if word in ['i', 'me', 'my', 'you', 'your']])

# get content length
def getLength(content):
    return np.log((len(content.split()))+1)

# preprocessing
def preprocessing_text(content):
    content = toLower(content)
    content = removePunctuation(content)
    content = wordLemmatization(content)
    content = removePronouns(content)
    content = removeCommon(content)
    return content

# feature engineering
def feature_engineering(data):
    contents = []
    for text in data:
        content = toLower(text)
        content = removeCommon(content)
        contents.append(np.array([
            pos_count(content),
            neg_count(content),
            isContainNo(content),
            isContainNot(content),
            isContainBut(content),
            pron_count(content),
            isContainExclamation(content),
            getLength(content)
        ]))
    return contents

def text_vectorize(data):
    cleaned_text = []
    for text in data:
        cleaned_text.append(preprocessing_text(text))
    from_file = pickle.load(open("../vectorizers/vectorizer.pkl", "rb"))
    vectorizer = TextVectorization.from_config(from_file['config'])
    vectorizer.set_weights(from_file['weights'])
    text = vectorizer(cleaned_text)
    return text

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/raychannudam/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /home/raychannudam/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/raychannudam/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/raychannudam/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [75]:
data = [
    "The product exceeded my expectations! It's incredibly durable and efficient. I highly recommend it to anyone in need of such a tool.",
    "I was disappointed with the quality of the product. It broke within the first week of use, and the customer service was unhelpful in resolving the issue.",
    "This restaurant never fails to impress me. The food is always delicious, and the staff is friendly and attentive. Definitely worth a visit!",
    "I regret purchasing this item. It doesn't work as advertised, and I feel like I wasted my money.",
    "The movie was a masterpiece! The acting was superb, the storyline was captivating, and the cinematography was stunning. I can't wait to watch it again.",
    "I had a terrible experience at this hotel. The room was dirty, the staff was rude, and there were constant disturbances throughout the night.",
    "The book was a page-turner from start to finish. The characters were well-developed, and the plot kept me hooked until the very end.",
    "I absolutely love this app! It's user-friendly, and it has all the features I need to stay organized and productive.",
    "The concert was a disaster. The sound quality was terrible, and the performers seemed unenthusiastic. I left feeling disappointed and frustrated.",
    "I'm impressed with the efficiency of this product. It's made my daily tasks so much easier to manage.",
]

X_text = text_vectorize(data)
X_num = np.array(feature_engineering(data))

In [76]:
model = tf.keras.models.load_model("../models/sentimental.h5")
model.predict([X_text, X_num])



array([[0.9999901 ],
       [0.02369062],
       [0.9999273 ],
       [0.29117242],
       [0.99189454],
       [0.01319849],
       [0.05512051],
       [0.9999974 ],
       [0.01917393],
       [0.9999448 ]], dtype=float32)