### Import Libraries

In [1]:
import numpy as np
import pandas as pd

import re
import string

import nltk

from nltk.stem import PorterStemmer
ps = PorterStemmer()

### Open the StopWords

In [2]:
with open('../static/model/corpora/stopwords/english', 'r') as file:
    sw = file.read().splitlines()

### Import Vocabulary

In [3]:
vocab = pd.read_csv('../static/model/vocab.txt', header=None)
tokens = vocab[0].tolist()

### Import Model

In [4]:
import pickle

with open('../static/model/model.pickle', 'rb') as file:
    model = pickle.load(file)

### Text Preprocessing

In [5]:
def preprocessing(text):
    data = pd.DataFrame([text], columns=["tweet"])
    #uppercase to lowercase
    data["tweet"] = data["tweet"].apply(lambda x: " ".join(x.lower() for x in x.split()))

    #remove links
    data["tweet"] = data["tweet"].apply(lambda x: " ".join(re.sub(r'^https?:\/\/.*[\r\n]*', '', x, flags=re.MULTILINE) for x in x.split()))

    #remove punctuations
    data["tweet"] = data["tweet"].apply(lambda x: "".join(char for char in x if char not in string.punctuation))

    #remove numbers
    data["tweet"] = data["tweet"].str.replace('/d+', '', regex = True)

    #remove stopwords
    data['tweet'] = data['tweet'].apply(lambda x: " ".join(x for x in x.split() if x not in sw))

    #stemming
    data['tweet'] = data['tweet'].apply(lambda x: " ".join(ps.stem(x) for x in x.split()))

    return data["tweet"]

### Vectorization

In [6]:
def vectorizer(dataset, vocabulary):
    vectorized_list = []

    for sentence in dataset:
        sentence_list = np.zeros(len(vocabulary))

        for i in range(len(vocabulary)):
            if vocabulary[i] in sentence.split():
                sentence_list[i] = 1

        vectorized_list.append(sentence_list)

    vectorized_list_new = np.asarray(vectorized_list, dtype = np.float32)

    return vectorized_list_new

### Get Prediction

In [12]:
def get_prediction(vectorized_text):
    prediction = model.predict(vectorized_text)
    if prediction == 1:
        return 'Negative'
    else:
        return 'Positive'

In [21]:
txt = 'Worst product I ever buy. Hate this'
preprocessed_txt = preprocessing(txt)
vectorized_txt = vectorizer(preprocessed_txt, tokens)
prediction = get_prediction(vectorized_txt)
prediction

'Negative'