In [34]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer #stemmer

import re
import string
from bs4 import BeautifulSoup

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [0]:
import warnings
warnings.filterwarnings("ignore")  

import sys
import time

from sklearn.feature_extraction.text import CountVectorizer #For Bag of words
from sklearn.feature_extraction.text import TfidfVectorizer #For TF-IDF
from gensim.models import Word2Vec                          #For Word2Vec

from sklearn.model_selection import train_test_split
from keras.utils import np_utils

###

import itertools
import os

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score

from keras.models import Sequential, Model, load_model

from keras.callbacks import ModelCheckpoint, EarlyStopping


from keras.layers import Input, Dense, Activation, Dropout, LSTM, Flatten
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.preprocessing import text, sequence
from keras import utils

In [0]:
def load_model(file_path):
  model = keras.models.load_model(file_path)
  return model

def predict_class(input_x, model):
  y_probs = model.predict(input_x) 
  y_classes = y_probs.argmax(axis=-1)
  return y_probs

uri_re = r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))'

def stripTagsAndUris(x):
    if x:
        # BeautifulSoup on content
        soup = BeautifulSoup(x, "html.parser")
        # Stripping all <code> tags with their content if any
        if soup.code:
            soup.code.decompose()
        # Get all the text out of the html
        text =  soup.get_text()
        # Returning text stripping out all uris
        return re.sub(uri_re, "", text)
    else:
        return ""

def removePunctuation(x):
    # Lowercasing all words
    x = x.lower()
    # Removing non ASCII chars
    x = re.sub(r'[^\x00-\x7f]',r' ',x)
    # Removing (replacing with empty spaces actually) all the punctuations
    return re.sub("["+string.punctuation+"]", " ", x)

snow = nltk.stem.SnowballStemmer('english')
stops = set(stopwords.words("english"))
def stemAndRemoveStopwords(x):
    # Removing all the stopwords
    filtered_words = [snow.stem(word) for word in x.split() if word not in stops]
    return " ".join(filtered_words)
    
def remove_pattern(input_txt, pattern):
    r = re.findall(pattern, input_txt)
    for i in r:
        input_txt = re.sub(i, '', input_txt)    
    return input_txt
  
def preprocess(df):
    df["content"] = np.vectorize(remove_pattern)(df["content"], "@[\w]*")
    df["content"] = df["content"].map(stripTagsAndUris)
    df["content"] = df["content"].map(removePunctuation)
    df["content"] = df["content"].map(stemAndRemoveStopwords)
    return df
  
def insert_text(input_text, dataframe):
    dataframe = dataframe.append({'content' : input_text}, ignore_index=True)
    return dataframe

In [45]:
df = pd.DataFrame(columns=['content', 'sentiment'])
print(df)
df = insert_text("I had great lunch today I feel good", df)
print(df)
df = preprocess(df)
print(df)
model = load_model("../content/drive/My Drive/Colab Notebooks/models/model-simple.h5")
print(df['content'][0])

tokenize = text.Tokenizer(num_words=3000, char_level=False)
x_text = tokenize.texts_to_matrix(df['content'][0])
# angry, happy, neutral, sad
# 1. 0. 0. 0. 
# 0. 1. 0. 0.
# 0. 0. 1. 0.
# 0. 0. 0. 1.
print(predict_class(x_text, model))

Empty DataFrame
Columns: [content, sentiment]
Index: []
                               content sentiment
0  I had great lunch today I feel good       NaN
                       content sentiment
0  great lunch today feel good       NaN
great lunch today feel good
[[0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.4894602  0.15393667]
 [0.21192788 0.14467524 0.48946