In [8]:
import wx
import re
import pickle
import pandas as pd
import sklearn
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.stem import WordNetLemmatizer
import numpy as np
nltk.download('wordnet')
nltk.download('omw-1.4')
import neattext.functions as nfx
import matplotlib.pyplot as plt
import plotly.express as plx
import keras
from tqdm import tqdm
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

## if app crashes or does start, use del app
del app

#importing recommendation data
recom = pd.read_csv("./data/rec.csv")


## Establishing Dictionary

## Defining dictionary containing all emojis with their meanings.
emojis = {':)': 'smile', ':-)': 'smile', ';d': 'wink', ':-E': 'vampire', ':(': 'sad', 
          ':-(': 'sad', ':-<': 'sad', ':P': 'raspberry', ':O': 'surprised',
          ':-@': 'shocked', ':@': 'shocked',':-$': 'confused', ':\\': 'annoyed', 
          ':#': 'mute', ':X': 'mute', ':^)': 'smile', ':-&': 'confused', '$_$': 'greedy',
          '@@': 'eyeroll', ':-!': 'confused', ':-D': 'smile', ':-0': 'yell', 'O.o': 'confused',
          '<(-_-)>': 'robot', 'd[-_-]b': 'dj', ":'-)": 'sadsmile', ';)': 'wink', 
          ';-)': 'wink', 'O:-)': 'angel','O*-)': 'angel','(:-D': 'gossip', '=^.^=': 'cat'}

## Defining set containing all stopwords in english.
stopwordlist = ['a', 'about', 'above', 'after', 'again', 'ain', 'all', 'am', 'an',
             'and','any','are', 'as', 'at', 'be', 'because', 'been', 'before',
             'being', 'below', 'between','both', 'by', 'can', 'd', 'did', 'do',
             'does', 'doing', 'down', 'during', 'each','few', 'for', 'from', 
             'further', 'had', 'has', 'have', 'having', 'he', 'her', 'here',
             'hers', 'herself', 'him', 'himself', 'his', 'how', 'i', 'if', 'in',
             'into','is', 'it', 'its', 'itself', 'just', 'll', 'm', 'ma',
             'me', 'more', 'most','my', 'myself', 'now', 'o', 'of', 'on', 'once',
             'only', 'or', 'other', 'our', 'ours','ourselves', 'out', 'own', 're',
             's', 'same', 'she', "shes", 'should', "shouldve",'so', 'some', 'such',
             't', 'than', 'that', "thatll", 'the', 'their', 'theirs', 'them',
              'through', 'to', 'too','under', 'until', 'up', 've', 'very', 'was',
             'we', 'were', 'what', 'when', 'where','which','while', 'who', 'whom',
             'why', 'will', 'with', 'won', 'y', 'you', "youd","youll", "youre",
             "youve", 'your', 'yours', 'yourself', 'yourselves']


#load vectoriser for text processing
def load_models():

    # Load the vectoriser.
    file = open('./pickle/vectoriser.pickle', 'rb')
    vectoriser = pickle.load(file)
    file.close()
    # Load the BNB Model.
    file = open('./pickle/BNB.pickle', 'rb')
    BNBmodel = pickle.load(file)
    file.close()

    # Load the BNB model for category.
    file = open('./pickle/Suicide-BNB.pickle', 'rb')
    suicidemodel = pickle.load(file)
    file.close()
    # Load the vectoriser for category.
    file = open('./pickle/vectoriser-ngram12-suicide.pickle', 'rb')
    vectorisersui = pickle.load(file)
    file.close() 

    return vectoriser, BNBmodel, suicidemodel, vectorisersui

#sentiment prediction model loading
def predict(vectoriser, model, text):
    # Predict the sentiment
    textdata = vectoriser.transform(preprocess(text))
    sentiment = model.predict(textdata)
    
    # Make a list of text with sentiment.
    data = []
    for text, pred in zip(text, sentiment):
        data.append((text,pred))
        
    # Convert the list into a Pandas DataFrame.
    df = pd.DataFrame(data, columns = ['text','sentiment'])
    df = df.replace([0,1], ["Negative","Positive"])
    return df

#mood prediction model loading
def predict2(vectoriser, model, text):
    # Predict the sentiment
    textdata = vectoriser.transform(clean_text(text))
    category = model.predict(textdata)
    
    # Make a list of text with sentiment.
    data = []
    for text, pred in zip(text, category):
        data.append((text,pred))
        
    # Convert the list into a Pandas DataFrame.
    df1 = pd.DataFrame(data, columns = ['text','category'])
    #df1 = df1.replace([0,1], ["Negative","Positive"])
    return df1

# text precossing for mood model
##def clean_text(text):
##    text_length=[]
##    cleaned_text=[]
##    for sent in tqdm(text):
##        sent=sent.lower()
##        sent=nfx.remove_special_characters(sent)
##        sent=nfx.remove_stopwords(sent)
#         sent=nfx.remove_shortwords(sent)
##        text_length.append(len(sent.split()))
##        cleaned_text.append(sent)
##    return cleaned_text,text_length
    
#preprocess text input
def preprocess(textdata):
    processedText = []
    
    # Create Lemmatizer and Stemmer.
    wordLemm = WordNetLemmatizer()
    
    # Defining regex patterns.
    urlPattern        = r"((http://)[^ ]*|(https://)[^ ]*|( www\.)[^ ]*)"
    userPattern       = '@[^\s]+'
    alphaPattern      = "[^a-zA-Z0-9]"
    sequencePattern   = r"(.)\1\1+"
    seqReplacePattern = r"\1\1"
    
    for tweet in textdata:
        tweet = tweet.lower()
        
        # Replace all URls with 'URL'
        tweet = re.sub(urlPattern,' URL',tweet)
        # Replace all emojis.
        for emoji in emojis.keys():
            tweet = tweet.replace(emoji, "EMOJI" + emojis[emoji])        

                    # Replace @USERNAME to 'USER'.
        tweet = re.sub(userPattern,' USER', tweet)        
        # Replace all non alphabets.
        tweet = re.sub(alphaPattern, " ", tweet)
        # Replace 3 or more consecutive letters by 2 letter.
        tweet = re.sub(sequencePattern, seqReplacePattern, tweet)

        tweetwords = ''
        for word in tweet.split():
            # Checking if the word is a stopword.
            #if word not in stopwordlist:
            if len(word)>1:
                # Lemmatizing the word.
                word = wordLemm.lemmatize(word)
                tweetwords += (word+' ')
            
        processedText.append(tweetwords)
        
    return processedText

# text precossing for mood model
def clean_text(text):
    text_length=[]
    cleaned_text=[]
    for sent in tqdm(text):
        sent=sent.lower()
        sent=nfx.remove_special_characters(sent)
        sent=nfx.remove_stopwords(sent)
#         sent=nfx.remove_shortwords(sent)
        text_length.append(len(sent.split()))
        cleaned_text.append(sent)
    return cleaned_text

#defining frame
class MyFrame(wx.Frame):    
    def __init__(self):
        super().__init__(parent=None, title='Sentiment Analysis')
        panel = wx.Panel(self)        
        my_sizer = wx.BoxSizer(wx.VERTICAL)        
        self.text_ctrl = wx.TextCtrl(panel)
        my_sizer.Add(self.text_ctrl, 0, wx.ALL | wx.EXPAND, 5)        
        my_btn = wx.Button(panel, label='Submit the Post')
        my_btn.Bind(wx.EVT_BUTTON, self.on_press)
        my_sizer.Add(my_btn, 0, wx.ALL | wx.CENTER, 5)  
        self.text_ctrl1 = wx.TextCtrl(panel)
        my_sizer.Add(self.text_ctrl1, 0, wx.ALL | wx.EXPAND, 5)                 
        panel.SetSizer(my_sizer)        
        self.Show()

    def on_press(self, event):
        value = self.text_ctrl.GetValue()
        if not value:
            self.text_ctrl1.SetValue("Try harder by entering something!") 
            print("Try harder by entering something!")
        else:
            vectoriser, BNBmodel, suicidemodel, vectorisersui = load_models()
            text = [value]
            df = predict(vectoriser, BNBmodel, text)
            if df.sentiment.to_string() =='0    Negative':
                df1 = predict2(vectorisersui, suicidemodel, text)
                str1 = "breakup"
                str2 = 'stress'

                if df1.category.to_string() =='0    non-suicide':
                    if any(str1 in string for string in text)== True:
                        dplyb = recom.loc[recom['category'] == 'breakup']
                        self.text_ctrl1.SetValue(dplyb['info'].to_string()) 
                        print('breakup') 
                    elif any(str2 in string for string in text)== True:
                        dplyst = recom.loc[recom['category'] == 'stress']
                        self.text_ctrl1.SetValue(dplyst['info'].to_string()) 
                        print('stress')  
                    else:
                        dplys = recom.loc[recom['category'] == 'sad']
                        self.text_ctrl1.SetValue(dplys['info'].to_string())
                        print('sad')           
                else:
                    dplyg = recom.loc[recom['category'] == 'sui']
                    self.text_ctrl1.SetValue(dplyg['info'].to_string()) 
                    print('suicide')
                #For more complex ones, iterate through the list to find keyword
                #for category in recom.to_numpy('sentiment')
                    #pokemon_games = df.loc[df['Name'].str.contains("pokemon", case=False)]
            else: 
                dplypo = recom.loc[recom['sentiment'] == 'positive']
                dplypo = dplypo['info'].sample(n=1)
                self.text_ctrl1.SetValue(dplypo.to_string()) 
                print(df.sentiment)


if __name__ == '__main__':
    app = wx.App()
    frame = MyFrame()
    app.MainLoop()


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\clickratio\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\clickratio\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
