Emotion Detection Testing

In [12]:
import pandas as pd
import numpy as np

In [14]:
import matplotlib.pyplot as plt
import seaborn as sns

In [15]:
import neattext.functions as nfx

In [16]:
df = pd.read_csv("/Users/Akshana/Desktop/MoodScan/emotion_dataset_2.csv")

In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,Emotion,Text,Clean_Text
0,0,neutral,Why ?,
1,1,joy,Sage Act upgrade on my to do list for tommorow.,Sage Act upgrade list tommorow
2,2,sadness,ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...,WAY HOMEGIRL BABY FUNERAL MAN HATE FUNERALS SH...
3,3,joy,Such an eye ! The true hazel eye-and so brill...,eye true hazel eyeand brilliant Regular feat...
4,4,joy,@Iluvmiasantos ugh babe.. hugggzzz for u .! b...,ugh babe hugggzzz u babe naamazed nga ako e...


In [None]:
from textblob import TextBlob

In [None]:
def get_sentiment(text):
    blob = TextBlob(text)
    sentiment = blob.sentiment.polarity
    if sentiment < 0:
        result = 'negative'
    elif sentiment > 0:
        result = 'positive'
    elif sentiment == 0:
        result = 'neutral'
    return result
    

In [64]:
get_sentiment("i am sad")


'negative'

In [66]:
# clean text

df['Clean_Text'] = df['Text'].apply(nfx.remove_stopwords)
df['Clean_Text'] = df['Text'].apply(nfx.remove_punctuations)
df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)


In [67]:
#keyword extraction
#extract common words for each emotion

from collections import Counter

def extract_keywords(text, num=50):
    tokens = [tok for tok in text.split()]
    most_common_tokens = Counter(tokens).most_common(num)
    return dict(most_common_tokens)



In [70]:
emotion_list = df['Emotion'].unique().tolist()

emotion_list

['neutral', 'joy', 'sadness', 'fear', 'surprise', 'anger', 'shame', 'disgust']

In [72]:
neutral_texts = df.loc[df['Emotion'] == 'neutral', 'Text']


neutral_texts.head()

0                                                 Why ? 
61      My mother-in-law used to do the same thing to...
68      Well , I didn't think so at first . But as we...
76                                    Once in a while . 
114                                                So ? 
Name: Text, dtype: object

#machine learning portion

In [75]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB

from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report,ConfusionMatrixDisplay

from sklearn.model_selection import train_test_split


In [77]:
xtext = df["Clean_Text"]
ylabels = df["Emotion"]


In [79]:
#creates an instance of countvectorizer, this vectorizer tokenizes words and counts each instance of the word
cv = CountVectorizer()

x_values = cv.fit_transform(xtext)

In [81]:

x_train, x_test, y_train, y_test = train_test_split(x_values, ylabels, test_size=0.3, random_state=42)


In [84]:
nv_model = MultinomialNB()
nv_model.fit(x_train,y_train)

In [86]:
nv_model.score(x_test,y_test)

0.5643801494539183

In [88]:
y_pred_for_nv = nv_model.predict(x_test)
y_pred_for_nv

array(['fear', 'sadness', 'sadness', ..., 'sadness', 'joy', 'sadness'],
      dtype='<U8')

In [90]:
sample_text = ["I love codeine"]

In [92]:
vect = cv.transform(sample_text).toarray()


In [94]:
nv_model.predict(vect)

array(['joy'], dtype='<U8')

In [96]:
nv_model.predict_proba(vect)

array([[2.85821242e-02, 3.69369954e-03, 2.71107247e-01, 4.84972317e-01,
        8.80344859e-03, 1.15480793e-01, 6.64738250e-05, 8.72938969e-02]])

In [98]:
nv_model.classes_

array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'shame',
       'surprise'], dtype='<U8')

In [100]:
np.max(nv_model.predict_proba(vect))

0.48497231679844177

In [102]:
def predict_emotion(sample_text,model):
    myvect = cv.transform(sample_text).toarray()
    prediction = model.predict(myvect)
    pred_proba = model.predict_proba(myvect)
    pred_percentage_for_all = dict(zip(model.classes_,pred_proba[0]))
    print(prediction[0], pred_percentage_for_all[prediction[0]])
    return pred_percentage_for_all
    

In [104]:
predict_emotion(sample_text,nv_model)

joy 0.48497231679844177


{'anger': 0.02858212423779649,
 'disgust': 0.003693699544124144,
 'fear': 0.2711072470551628,
 'joy': 0.48497231679844177,
 'neutral': 0.00880344859298532,
 'sadness': 0.1154807930258327,
 'shame': 6.647382500172809e-05,
 'surprise': 0.08729389692065496}

In [106]:
example_journal = ["Yesterday was more sad.I went to get ice cream and was enjoying the day."]
predict_emotion(example_journal,nv_model)

sadness 0.6826276174312786


{'anger': 0.0008083445427132129,
 'disgust': 1.6096568922233444e-10,
 'fear': 0.015220807611397158,
 'joy': 0.30132137471175113,
 'neutral': 1.5486767960503278e-11,
 'sadness': 0.6826276174312786,
 'shame': 1.4464850069571366e-15,
 'surprise': 2.185552641035754e-05}

In [108]:
example_sad = ["Yesterday was more sad"]
predict_emotion(example_sad,nv_model)

sadness 0.834795430411922


{'anger': 0.007781538169118904,
 'disgust': 3.871488031357159e-05,
 'fear': 0.1421859957750181,
 'joy': 0.013727528074255493,
 'neutral': 2.593308123880373e-05,
 'sadness': 0.834795430411922,
 'shame': 3.591117475728731e-07,
 'surprise': 0.001444500496386037}

In [110]:
example_sad_scramble = ["was sad Yesterday more"]
predict_emotion(example_sad,nv_model)

sadness 0.834795430411922


{'anger': 0.007781538169118904,
 'disgust': 3.871488031357159e-05,
 'fear': 0.1421859957750181,
 'joy': 0.013727528074255493,
 'neutral': 2.593308123880373e-05,
 'sadness': 0.834795430411922,
 'shame': 3.591117475728731e-07,
 'surprise': 0.001444500496386037}

In [116]:
example_sad = ["dang"]
predict_emotion(example_happy,nv_model)

NameError: name 'example_happy' is not defined

#what im thinking now is that its a bit inaccurate when having multuple emotions in a long paragrah, but it is accurate when its per sentence, I want to figure out the accuracy of this current model and see if tokenizing per sentence and applying the model that way to then add up the score and identify changes in mood throughout a paragrah to be a lil bit more accurate/ identify what the mood actually is.
    Questions I have though is how to properly see what the accuracy of the model is how do I know im not acc making it worse?

-so when I have multiple moods in one paragraph?

-put many examples through the model and give the actual and predicted and see what percentage it got correct

things I need to do to create a model vs a algorithm? vs fitting an algorithm?



###linear regression model

In [120]:
lr_model = LogisticRegression()
lr_model.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [121]:
lr_model.score(x_test,y_test)

0.6323050392795555

In [122]:
nv_model.score(x_test,y_test)

0.5643801494539183

In [123]:
predict_emotion(example_sad,lr_model)

neutral 0.35642598352994603


{'anger': 0.0381266001461116,
 'disgust': 0.015448217182025029,
 'fear': 0.025874777597042264,
 'joy': 0.2623073729497478,
 'neutral': 0.35642598352994603,
 'sadness': 0.26965264914789916,
 'shame': 0.001151229509751553,
 'surprise': 0.031013169937476524}

In [124]:
predict_emotion(example_sad,nv_model)

sadness 0.4291520170788849


{'anger': 0.08306749911046445,
 'disgust': 0.03220471449670426,
 'fear': 0.04545646624381881,
 'joy': 0.3151894759747558,
 'neutral': 0.045355649615802414,
 'sadness': 0.4291520170788849,
 'shame': 0.0037672280719435233,
 'surprise': 0.04580694940762557}

In [125]:
# now its the brainstorming, I 

In [126]:
journal1 = ["Today I went on a walk, it was a great walk. Yesterday was a bad day but today was much better"]

predict_emotion(journal1,nv_model)

fear 0.6015825929280336


{'anger': 0.0001078539395638351,
 'disgust': 8.685621508779635e-13,
 'fear': 0.6015825929280336,
 'joy': 0.30546381412100315,
 'neutral': 1.7383735412253236e-12,
 'sadness': 0.09277864752136444,
 'shame': 1.4523749928975985e-22,
 'surprise': 6.709148742611177e-05}

In [127]:
def mod_predict_emotion_number(sample_text,model):
    myvect = cv.transform(sample_text).toarray()
    prediction = model.predict(myvect)
    pred_proba = model.predict_proba(myvect)
    pred_percentage_for_all = dict(zip(model.classes_,pred_proba[0]))
    
    return [prediction[0],pred_percentage_for_all[prediction[0]]]
    

In [128]:
journal2 = ['I went to the mall. I was very happy']

mod_predict_emotion(journal2,nv_model)



NameError: name 'mod_predict_emotion' is not defined

In [143]:
import nltk

from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize

In [147]:
sentences = sent_tokenize(((journal2)[0]))
sentences

['I went to the mall.', 'I was very happy']

->found a discrepency in how the function works, longer text is less acurate
i want to make a funciton that takes in a large amount of text not model specific that is 
1. breaks into sentences
2. for each sentence apply the nv model to extract the emotion and the number associated
3. get a list of the emotion and the number in order
4. group all the emotions that are the same and add the numbers together, do I have to make a dict
5. which ever one is the largest, make that the output

In [150]:

def large_text_emotion(text):   
    sentences = sent_tokenize(text)
    emotions_contained = []
    calculation = {}
    
    for sentence in sentences:
        result = mod_predict_emotion_number([sentence],nv_model)
        emotions_contained.append(result)

        if result[0] not in calculation:
            calculation[result[0]] = [result[1],1]
        else:
            calculation[result[0]][0] += result[1]
            calculation[result[0]][1] += 1
            
    largest = max(calculation[stat][0] for stat in calculation)
    
    emotion_largest = [each for each in calculation if calculation[each][0] == largest][0]

    #extract the list of the emotion from the statistic of the emotion

    avg_of_max = calculation[emotion_largest][0]/calculation[emotion_largest][1]

    return emotion_largest,avg_of_max, calculation



In [152]:
journal1 = 'Today I went on a walk, it was a great walk. Yesterday was a bad day but today was much better'

journal2 = 'Today was a good day. I went to the mall. I was very happy. There was a scary earthquake though.'

journal3 ='I went to the library and I was nervous and anxious. When I got my work done I was relieved and happy'

journal4 ='It was a windy day today, i didnt know how to feel. It reminded me of my childhood and how i always hated the wind.'

In [154]:
large_text_emotion(journal4)

('joy', 0.7777064878186617, {'joy': [1.5554129756373234, 2]})

In [156]:
predict_emotion([journal1],nv_model)

fear 0.6015825929280336


{'anger': 0.0001078539395638351,
 'disgust': 8.685621508779635e-13,
 'fear': 0.6015825929280336,
 'joy': 0.30546381412100315,
 'neutral': 1.7383735412253236e-12,
 'sadness': 0.09277864752136444,
 'shame': 1.4523749928975985e-22,
 'surprise': 6.709148742611177e-05}

I have to find the number of instances of each thing that there was, this will be useful data and I should add it to the calculation dict

things to fix with this model->

if joy or sadness scores arent glaringly high, auto register it as neutral.

maybe finding the issue w this:
    the scores for each are dependant on the number of words in general so low amount of words in the sentence means higher score overall which is not what we want when it comes to adding up scores.



In [173]:
dec7='So what is my plan exactly when it comes to guys, just wait it out until I go to mnpls and magically il end up w someone? Im putting up a brick wall rn that says no guys jus bc I dont understand anything. I dont understand how im supposed to find someone im compatible w and then date like 10 of those mfs LMAO. I really don’t understand ts. But what I do know is that I dont want to think abt it rn. the few problems I think are that going out =! Successful bc im not around my type AT ALL. Bar or party im not rlly friends w those mf so for it to work I need to be around a lot of those mfs, this way I can network? Or jus find someone who works w me ig. Although I will say the likelihood of that is a lil slim. The other things I could do is go in a community type space like sh did and how a lot of ppl do it. This plan I can always find diff places to go to duhhh. Yeah. So why am I not doing that rn? Cus rn atm im jus focused what can I say. Yeah yeah whatever il jus try next sem or something idk. Money over hoes, maximize my vibe, good connections and friends, internal heart healing.'

In [175]:
dec6 ='''I feel that I am dwelling on the past and my failures too much. It sucks bc It feels like a failure, that I failed somehow, not that im not good enough, that I failed. I think that I need to focus on my goals in the fire if I want to feel better and get my confidence back. I must focus on my goals and it will only be a distant memory. Not dwelling on the past has gotten you this far Akshana you are closer than ever to figuring out your goals. The only reason it hurts is because it was your first ever. This is the only reason pookie bear. Trust in God to guide you though this and in the mean time focus on your goals. I was thinking yest. 
	Praying is such a benefit to your life. It keeps you gratuitous, helps your sleep, helps you be happy for the day, remembers the people you love, everything. Keep praying akshana. It’s awful that being w the wrong person made me unhappy w my own religion. It made me feel guilty and made me sad that being devout weakened the connection that I had w a random guy. I did love him at some point and im happy I had that experience but I must separate isles from my failures. Failure is a part of life. Continue being genuine to yourself akshana and genuinety will come to you.
	in this interm period I need to figure out what I can do to heal my bpd so my next romantic relationship goes smoother. And what behaviors caused those actions and what are healthy behaviors to exhibit. Also, my life revolved mourned him, I need to make sure I am confident in my life first before anything else.
	I just want to tell someone how significant it was to me. I literally went crazy I literally went insane it was so incredible and so painful at the same time. I want someone to understand that. It’s hard for me to admit this I think. Breaking relationships with friends or guys its like ripping a piece of paper in half. I hate that sometimes it has to happen. So why does my first experience w love have to be the worst fucking ending ever? I need to know that '''

In [177]:
large_text_emotion(dec6)

('joy',
 0.7729857647529677,
 {'joy': [10.04881494178858, 13],
  'sadness': [6.095271057073649, 9],
  'anger': [1.5224096765848032, 3],
  'fear': [1.4515951480775449, 3]})

In [179]:
predict_emotion([dec6],nv_model)

sadness 0.7649040821824455


{'anger': 4.636624436577877e-19,
 'disgust': 2.0660317359658193e-144,
 'fear': 5.774937144530393e-18,
 'joy': 0.23509591781754485,
 'neutral': 8.685978316980127e-145,
 'sadness': 0.7649040821824455,
 'shame': 2.010021880907046e-298,
 'surprise': 1.134018196284635e-65}