In [1]:
import keras
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from gensim.models import Word2Vec
import pickle

In [7]:
word2vec_model = Word2Vec.load('./sentimentAnalysis/models/word2vec.model')
s_analysis_model = load_model('./sentimentAnalysis/models/lstm_model.h5')


with open('./sentimentAnalysis/models/word_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)



In [3]:
# PARAMS
SEQUENCE_LENGTH = 300
POSITIVE = "POSITIVE"
NEGATIVE = "NEGATIVE"
NEUTRAL = "NEUTRAL"
SENTIMENT_THRESHOLDS = (0.4, 0.7)


def decode_sentiment(score, include_neutral=True):
    if include_neutral:        
        label = NEUTRAL
        if score <= SENTIMENT_THRESHOLDS[0]:
            label = NEGATIVE
        elif score >= SENTIMENT_THRESHOLDS[1]:
            label = POSITIVE

        return label
    else:
        return NEGATIVE if score < 0.5 else POSITIVE
    
    
def predict_sentiment(text, include_neutral=True):
    # Tokenize text
    x_test = pad_sequences(tokenizer.texts_to_sequences([text]), maxlen=SEQUENCE_LENGTH)
    # Predict
    score = word2vec_model.predict([x_test])[0]
    # Decode sentiment
    label = decode_sentiment(score, include_neutral=include_neutral)

    return {"label": label, "score": float(score)}  

In [4]:
CATEGORIES = ["Behaviorial", "Body Image", "Grief", "Relationship", "Depression", "Physical"]


behaviorial_keywords = ["sleep", "ocd", "anxiety", "control", "mood", "behavior", "uncontrollable", "anger", "compulsory"]
body_image_keywords = ["fat", "skinny", "obese", "ugly", "acne", "unpopular", "body", "eating", "drinking", "weight"]
grief_keywords = ["grief", "loss", "sadness", "death", "regret", "shock", "denial", "disbelief", "overwhelmed"]
relationship_keywords = ["cheat", "feelings", "love", "boyfriend", "girlfriend", "wife", "husband", "relations", "friend", "trauma", "abuse", "trust"]
depression_keywords = ["depression", "suicide", "kill", "death", "misery", "motivation", "stress", "lonely", "illness", "drugs"]
physical_keywords = ["burn", "pain", "hurt", "physical", "surgery", "broken", "tear", "sprain", "blood", "bone", "ache", "walk", "rest"]

therapy_category_keywords = [behaviorial_keywords, body_image_keywords, grief_keywords, relationship_keywords, depression_keywords, physical_keywords]


In [5]:
import re
import nltk
from nltk.corpus import stopwords
from  nltk.stem import SnowballStemmer
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to C:\Users\Max
[nltk_data]     Xiao\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
stop_words = stopwords.words("english")
stemmer = SnowballStemmer("english")

TEXT_CLEANING_RE = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+"

def preprocess(text, stem=False):
    # Remove link,user and special characters
    text = re.sub(TEXT_CLEANING_RE, ' ', str(text).lower()).strip()
    tokens = []
    for token in text.split():
        if token not in stop_words:
            if stem:
                tokens.append(stemmer.stem(token))
            else:
                tokens.append(token)
    return tokens

In [14]:
test_input = "I like children"

#calculates similiarity score between each word in the input and each keyword for the categories
#each categories top 2 words similarity scores are added up. Whichever categories sum is highest is the category for the sentence.

def categorize_problem(input):
    
    behaviorial_similiarity = []
    body_image_similiarity = []
    grief_similiarity = []
    relationship_similiarity = []
    depression_similarity = []
    physical_similiarity = []

    
    input_tokens = preprocess(input)
    
    for keyword_list in therapy_category_keywords:
        category_index = therapy_category_keywords.index(keyword_list)
        
        for word in input_tokens: 
            
            try:
                word2vec_model.wv.similarity(word, "hi")
            except KeyError as e:
                return ("Spell Check Your Words Please")
            
            word_similarity_score = [(word, word2vec_model.wv.similarity(word, keyword), keyword) for keyword in keyword_list]
            
            if category_index == 0:
                behaviorial_similiarity.append(word_similarity_score)
                behaviorial_similiarity = [sorted(x, key = lambda x: x[1], reverse= True) for x in behaviorial_similiarity]
            elif category_index == 1:
                body_image_similiarity.append(word_similarity_score)
                body_image_similiarity = [sorted(x, key = lambda x: x[1], reverse= True) for x in body_image_similiarity]
            elif category_index == 2:
                grief_similiarity.append(word_similarity_score)
                grief_similiarity = [sorted(x, key = lambda x: x[1], reverse= True) for x in grief_similiarity]
            elif category_index == 3:
                relationship_similiarity.append(word_similarity_score)
                relationship_similiarity = [sorted(x, key = lambda x: x[1], reverse= True) for x in relationship_similiarity]
            elif category_index == 4:
                depression_similarity.append(word_similarity_score)
                depression_similarity = [sorted(x, key = lambda x: x[1], reverse= True) for x in depression_similarity]
            elif category_index == 5:
                physical_similiarity.append(word_similarity_score)
                physical_similiarity = [sorted(x, key = lambda x: x[1], reverse= True) for x in physical_similiarity]
    
    
    category_similarity_scores = {
        'behaviorial'  : 0,
        'body_image'   : 0,
        'grief'        : 0,
        'relationship' : 0,
        'depression'   : 0,
        'physical'     : 0
    }
    
    for i in behaviorial_similiarity:
        category_similarity_scores['behaviorial']  += i[0][1] + i[1][1] #The score for input word to top 2 keywords. Example Keeps = bheavior + sleep = 0.3.
    for i in body_image_similiarity:
        category_similarity_scores['body_image']   += i[0][1] + i[1][1]
    for i in grief_similiarity:
        category_similarity_scores['grief']        += i[0][1] + i[1][1]
    for i in relationship_similiarity:
        category_similarity_scores['relationship'] += i[0][1] + i[1][1]
    for i in depression_similarity:
        category_similarity_scores['depression']   += i[0][1] + i[1][1]
    for i in physical_similiarity:
        category_similarity_scores['physical']     += i[0][1] + i[1][1]
    
    category = max(category_similarity_scores, key=category_similarity_scores.get)

    
    return [category, category_similarity_scores]


relationship
{'behaviorial': 0.5422804057598114, 'body_image': 0.704825222492218, 'grief': 0.572041928768158, 'relationship': 0.9723409116268158, 'depression': 0.6548178493976593, 'physical': 0.583156019449234}


In [15]:
test_input = ('hi i like children')

output = categorize_problem(test_input)
for i in output:
    print(i)
    

relationship
{'behaviorial': 0.4313633143901825, 'body_image': 0.7539129592478275, 'grief': 0.5382664278149605, 'relationship': 1.2951907217502594, 'depression': 0.5709313675761223, 'physical': 0.51666110008955}
