### Flu, Drug/Medicine, Mental Health Classifier

The same NLP model for flu shots. But with better labels.

In [4]:
import pickle 
import re
import nltk
from nltk.stem.snowball import SnowballStemmer

snowBallStemmer = SnowballStemmer("english")

In [11]:
# New 3 class keywords
flu_only = {"flu", "shot", "influenza", "nasal", "fluenza", "congestion"}

flu_drug = {"fever", "headache", "cough", "sneezing", "hospital", "sore", "mucus", "vaccine", "cold", "migraine", "runny-nose"}

mental_only = {"depression", "depressed", "stress", "helpless", "hopeless", "anxious", "kms", "die", "cry"}
     
mental_drug = {"feel", "blood", "kill", "hallucinate", "hurts"}

drug_only = {"throat", "vomit", "puke", "diarrhea", "stool", "cramps", "abdominal", "stomache", "bleeding", "nausea", "bloating", "injury"}

all_three = {"fatigue", "pain", "sick"}

In [12]:
# Extract the stems of the keywords.
stem_flu_only = [snowBallStemmer.stem(word) for word in flu_only]
stem_flu_drug = [snowBallStemmer.stem(word) for word in flu_drug]
stem_mental_only = [snowBallStemmer.stem(word) for word in mental_only]
stem_mental_drug = [snowBallStemmer.stem(word) for word in mental_drug]
stem_drug_only = [snowBallStemmer.stem(word) for word in drug_only]
stem_all_three = [snowBallStemmer.stem(word) for word in all_three]

In [13]:
# Consider typos in the text message.
def edits1(word):
    "All edits that are one edit away from `word`."
    letters    = 'abcdefghijklmnopqrstuvwxyz'
    splits     = [(word[:i], word[i:])    for i in range(len(word) + 1)]
    deletes    = [L + R[1:]               for L, R in splits if R]
    transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
    replaces   = [L + c + R[1:]           for L, R in splits if R for c in letters]
    inserts    = [L + c + R               for L, R in splits for c in letters]
    return set(deletes + transposes + replaces + inserts)
def edits2(word): 
    "All edits that are two edits away from `word`."
    return (e2 for e1 in edits1(word) for e2 in edits1(e1))

In [14]:
flu_only_list = [set().union(edits1(i)) for i in stem_flu_only]
flu_only_set =  set().union(*flu_only_list)
flu_only_set.update(flu_only)
pickle.dump(flu_only_set, open("../data/nlp3_fluonlywords.pkl", "wb"))

flu_drug_list = [set().union(edits1(i)) for i in stem_flu_drug]
flu_drug_set =  set().union(*flu_drug_list)
flu_drug_set.update(flu_drug)
pickle.dump(flu_drug_set, open("../data/nlp3_fludrugwords.pkl", "wb"))

mental_only_list = [set().union(edits1(i)) for i in stem_mental_only]
mental_only_set =  set().union(*mental_only_list)
mental_only_set.update(mental_only)
pickle.dump(mental_only_set, open("../data/nlp3_mentalonlywords.pkl", "wb"))

mental_drug_list = [set().union(edits1(i)) for i in stem_mental_drug]
mental_drug_set =  set().union(*mental_drug_list)
mental_drug_set.update(mental_drug)
pickle.dump(mental_drug_set, open("../data/nlp3_mentaldrugwords.pkl", "wb"))

drug_only_list = [set().union(edits1(i)) for i in stem_drug_only]
drug_only_set =  set().union(*drug_only_list)
drug_only_set.update(drug_only)
pickle.dump(drug_only_set, open("../data/nlp3_drugonlywords.pkl", "wb"))

all_three_list = [set().union(edits1(i)) for i in stem_all_three]
all_three_set =  set().union(*all_three_list)
all_three_set.update(all_three)
pickle.dump(all_three_set, open("../data/nlp3_allthreewords.pkl", "wb"))

In [102]:
"killing" in mental_only_set

False

In [7]:
def flu_weight(text):
    wordList = nltk.word_tokenize(text)
    words = [snowBallStemmer.stem(word) for word in wordList]
    word_weight = 0
    for w in words:
        if w in flu_drug_set:
            word_weight+=1.5
        elif w in flu_only_set:
            word_weight+=2
        elif w in all_three_set:
            word_weight+=1
    return word_weight

def drug_weight(text):
    wordList = nltk.word_tokenize(text)
    words = [snowBallStemmer.stem(word) for word in wordList]
    word_weight = 0
    for w in words:
        if w in flu_drug_set:
            word_weight+=1.5
        elif w in drug_only_set or w in mental_drug_set:
            word_weight+=2
        elif w in all_three_set:
            word_weight+=1
    return word_weight

def mental_weight(text):
    wordList = nltk.word_tokenize(text)
    words = [snowBallStemmer.stem(word) for word in wordList]
    word_weight = 0
    for w in words:
        print(w)
        if w in mental_only_set:
            word_weight+=2
        elif w in mental_drug_set:
            word_weight+=1.5
        elif w in all_three_set:
            word_weight+=1
    return word_weight

In [104]:
flu_weight("I have a runny nose"), drug_weight("I have a runny nose"), mental_weight("I have a runny nose")

i
have
a
runni
nose


(1.5, 1.5, 0)

In [105]:
test_sentence = "The pain is killing me"
flu_weight("I feel hurts"), drug_weight(test_sentence), mental_weight(test_sentence)

the
pain
is
kill
me


(0, 3, 2.5)

In [6]:
def nlp_classify(text):
    flu, drug, mental = flu_weight(text), drug_weight(text), mental_weight(text) 
    max_weight = max(flu, drug, mental)
    if drug == 0 and flu ==0 and mental==0:
        return "neither"
    elif drug == max_weight:
        return "drug"
    elif flu == max_weight:
        return "flu"
    else:
        return "mental"

In [107]:
nlp_classify("My pain is killing me")

my
pain
is
kill
me


'drug'

### Test for classifier

In [5]:
with open("../data/nlp3_fluonlywords.pkl", 'rb') as pickle_file:
    flu_only_set = pickle.load(pickle_file)
with open("../data/nlp3_mentalonlywords.pkl", 'rb') as pickle_file:
    mental_only_set = pickle.load(pickle_file)
with open("../data/nlp3_mentaldrugwords.pkl", 'rb') as pickle_file:
    mental_drug_set = pickle.load(pickle_file)
with open("../data/nlp3_fludrugwords.pkl", 'rb') as pickle_file:
    flu_drug_set = pickle.load(pickle_file)
with open("../data/nlp3_drugonlywords.pkl", 'rb') as pickle_file:
    drug_only_set = pickle.load(pickle_file)
with open("../data/nlp3_allthreewords.pkl", 'rb') as pickle_file:
    all_three_set = pickle.load(pickle_file)

In [15]:
nlp_classify("none")

none


'neither'