# Bigram Language Model

In [112]:
from collections import defaultdict
import nltk
import random
import numpy as np
class BigramLM:

    def __init__(self,corpus, alpha = 0):
        self.corpus = corpus
        self.alpha = alpha
        self.unigramFreq = self.generateUnigramCount()
        self.bigramFreq = self.generateBigramCount()
        self.probMatrix = None
        
    def generateUnigramCount(self):
        unigram_count = defaultdict(int)
    
        for sentence in self.corpus:
            for word in sentence:
                unigram_count[word]+=1
        return unigram_count
    
    def generateBigramCount(self):
        bigram_count = defaultdict(dict)
        for unigram in self.unigramFreq.keys():
            for unigram2 in self.unigramFreq.keys():
                bigram_count[unigram][unigram2] = 0
        for sentence in self.corpus:
            for i in range(len(sentence)-1):
                unigram = sentence[i]
                bigram_count[unigram][sentence[i+1]]+=1
        return bigram_count
    
    def createProbMatrix(self,alpha = 0,emotional_ratio = 0, emotional_dict = None,emotion = -1):
        probMatrix = defaultdict(dict)
        unigramFreq = self.generateUnigramCount()
        bigramFreq = self.generateBigramCount()
        
        for unigram in unigramFreq.keys():
            for word in bigramFreq[unigram].keys():
                    if (emotional_dict[unigram][word] == 0):
                        emotion_val = 0
                    else:
                        emotion_val = emotional_dict[unigram][word][emotion]["score"]

                    probMatrix[unigram][word] = (1-emotional_ratio)*(bigramFreq[unigram][word] + alpha)/(unigramFreq[unigram] + alpha*len(bigramFreq[unigram].keys())) + emotional_ratio*emotion_val
        self.probMatrix = probMatrix
        return probMatrix
    
    def knessar(self, d=0.75):

        probMatrix = self.probMatrix
        unigramFreq = self.unigramFreq
        bigramFreq = self.bigramFreq

        # Your actual data should be populated in probMatrix, unigramFreq, and bigramFreq

        continuation_probs = defaultdict(float)
        total_bigrams = sum(len(bigramFreq[u1]) for u1 in bigramFreq)
        for u2 in unigramFreq:
            continuation_probs[u2] = len(bigramFreq[u2]) / total_bigrams

        # Apply Kneser-Ney smoothing
        for u1 in probMatrix:
            total_unigrams_u1 = unigramFreq[u1]
            for u2 in probMatrix[u1]:
                count_u1u2 = bigramFreq[u1][u2]
                prob_abs_discounted = max(count_u1u2 - d, 0) / total_unigrams_u1
                prob_cont = continuation_probs[u2]
                prob_kn = prob_abs_discounted + prob_cont
                probMatrix[u1][u2] = prob_kn

        return probMatrix


    def next_word(self, cur_word):
        probList = self.probMatrix[cur_word]
        return random.choices(list(probList.keys()), weights=probList.values())[0]
    
    def first_word(self):
        probList = self.unigramFreq
        return random.choices(list(probList.keys()), weights = probList.values())[0]
    
    def generate_sentence(self):
        sentence = []
        cur_word = self.first_word()
        while(cur_word=="."):
            cur_word = self.first_word()
        while(cur_word!="."):
            sentence.append(cur_word)
            cur_word = self.next_word(cur_word)
        sentence.append(".")
        return sentence


In [3]:
def emotional_val_unigram(corpus):
    emotional_vals = defaultdict(dict)
    for sentence in corpus:
        for unigram in sentence:
            if unigram not in emotional_vals.keys():
                emotional_vals[unigram] = emotion_scores(unigram)[0]
    return emotional_vals

def emotional_val_bigram(corpus,bigramFreq):
    emotional_vals = defaultdict(dict)
    for sentence in corpus:
        for i in range(len(sentence)-1): 
            unigram = sentence[i]
            if bigramFreq[unigram][sentence[i+1]] ==0:
                emotional_vals[unigram][sentence[i+1]] = 0
            else:
                emotional_vals[unigram][sentence[i+1]] = emotion_scores(f"{unigram} {sentence[i+1]}")
        print(f"{unigram} {sentence[-1]}:",emotional_vals[sentence[-2]][sentence[-1]])
    return emotional_vals

In [1]:
from utils import emotion_scores

Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [7]:
import pandas as pd
data = pd.read_csv("./corpus.txt",header=None)
corpus = data.to_numpy()
corpus = corpus.reshape(corpus.shape[0])
corpus = [nltk.word_tokenize(sentence.strip() +".") for sentence in corpus if len(sentence) > 0]

In [9]:
emotional_dict = emotional_val_bigram(corpus,langModel.bigramFreq)

mooshilu .: [{'label': 'sadness', 'score': 0.05772329866886139}, {'label': 'joy', 'score': 0.14419090747833252}, {'label': 'love', 'score': 0.00634420569986105}, {'label': 'anger', 'score': 0.6393235325813293}, {'label': 'fear', 'score': 0.1472383737564087}, {'label': 'surprise', 'score': 0.00517965666949749}]
pathetic .: [{'label': 'sadness', 'score': 0.9815224409103394}, {'label': 'joy', 'score': 0.00062308314954862}, {'label': 'love', 'score': 0.0004284105380065739}, {'label': 'anger', 'score': 0.016495399177074432}, {'label': 'fear', 'score': 0.000515145598910749}, {'label': 'surprise', 'score': 0.0004156489158049226}]
other .: [{'label': 'sadness', 'score': 0.04563998430967331}, {'label': 'joy', 'score': 0.09064829349517822}, {'label': 'love', 'score': 0.0047338781878352165}, {'label': 'anger', 'score': 0.7104899287223816}, {'label': 'fear', 'score': 0.14380070567131042}, {'label': 'surprise', 'score': 0.004687183070927858}]
him .: [{'label': 'sadness', 'score': 0.0460939295589923

In [25]:
emotional_dict = pd.read_csv("./emotionalValuesBigram.csv",index_col =0,header = 0)

  emotional_dict = pd.read_csv("./emotionalValuesBigram.csv",index_col =0,header = 0)


In [26]:
emotional_dict.fillna(0,inplace=True)

In [13]:
df = pd.DataFrame(emotional_dict,index =langModel.bigramFreq.keys(),columns = langModel.bigramFreq.keys())

In [16]:
df

Unnamed: 0,i,stand,here,feel,empty,a,class,post,count,link,...,sandblog,achieve,spritzer,nagalene,connecting,google,stellarium,theyd,peter,robbed
i,"[{'label': 'sadness', 'score': 0.0218988917768...",0,"[{'label': 'sadness', 'score': 0.0434396117925...","[{'label': 'sadness', 'score': 0.0282275881618...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
stand,"[{'label': 'sadness', 'score': 0.0328661911189...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
here,0,"[{'label': 'sadness', 'score': 0.0170614719390...",0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
feel,"[{'label': 'sadness', 'score': 0.0082543669268...",0,0,0,0,"[{'label': 'sadness', 'score': 0.0039282166399...",0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
empty,0,0,0,"[{'label': 'sadness', 'score': 0.9989351630210...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
google,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
stellarium,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
theyd,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
peter,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
emotional_dict = df.to_dict()

In [18]:
emotional_dict["i"]["i"]

[{'label': 'sadness', 'score': 0.021898891776800156},
 {'label': 'joy', 'score': 0.044413890689611435},
 {'label': 'love', 'score': 0.0025837162975221872},
 {'label': 'anger', 'score': 0.9062756299972534},
 {'label': 'fear', 'score': 0.023186221718788147},
 {'label': 'surprise', 'score': 0.0016415688442066312}]

In [31]:
print(emotion_scores("I am standing."))

[{'label': 'sadness', 'score': 0.014017921872437}, {'label': 'joy', 'score': 0.11982432007789612}, {'label': 'love', 'score': 0.0028856282588094473}, {'label': 'anger', 'score': 0.5671917796134949}, {'label': 'fear', 'score': 0.29064297676086426}, {'label': 'surprise', 'score': 0.005437375977635384}]


In [101]:
langModel = BigramLM(corpus,0)

In [108]:
probMatrix = langModel.createProbMatrix(0,0.99,emotional_dict)

In [88]:
laplaceProbMatrix = langModel.createProbMatrix(1,0,emotional_dict)

In [103]:
knesserProbMatrix = langModel.knessar()

In [34]:
df = pd.DataFrame(probMatrix)
probMatrix = df.to_numpy()

In [89]:
df = pd.DataFrame(laplaceProbMatrix)
laplaceProbMatrix = df.to_numpy()

In [105]:
df = pd.DataFrame(knesserProbMatrix)
knesserProbMatrix = df.to_numpy()

In [57]:
probMatrix

array([[0.00026392, 0.        , 0.16216216, ..., 0.        , 0.        ,
        0.        ],
       [0.00052784, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.1       , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [71]:
probs = np.zeros((len(langModel.bigramFreq.keys()),len(langModel.bigramFreq.keys())))
i=j=0
for unigram in langModel.bigramFreq.keys():
    j = 0
    for unigram2 in langModel.bigramFreq[unigram].keys():
        probs[i][j] = langModel.bigramFreq[unigram][unigram2] 
        j+=1
    i+=1

In [82]:
flat_indices = np.argsort(-probs.flatten())[:5]
indices_2d = np.unravel_index(flat_indices, probs.shape)

# Print the result
print("Top 5 maximum values bigrams:")
for index in zip(*indices_2d):
    unigram1 = list(langModel.unigramFreq.keys())[index[0]]
    unigram2 = list(langModel.unigramFreq.keys())[index[1]]
    print( f"{unigram1}  {unigram2} {langModel.bigramFreq[unigram1][unigram2]/np.sum(list(langModel.unigramFreq.values()))}")

Top 5 maximum values bigrams:
i  feel 0.020778000245168144
i  am 0.0059861888611939685
feel  like 0.005046377640665223
i  was 0.004106566420136477
that  i 0.003350631307972051


In [90]:
# Get the indices of the top 5 maximum values
flat_indices = np.argsort(-laplaceProbMatrix.flatten())[:5]
indices_2d = np.unravel_index(flat_indices, probs.shape)

# Print the result
print("Top 5 maximum values indices:")
for index in zip(*indices_2d):
    unigram1 = list(langModel.unigramFreq.keys())[index[0]]
    unigram2 = list(langModel.unigramFreq.keys())[index[1]]
    print( f"{unigram1}  {unigram2} {laplaceProbMatrix[index[0]][index[1]]}")



Top 5 maximum values indices:
feel  i 0.11043610327619874
like  feel 0.0350976507217662
am  i 0.03189412019960946
i  that 0.02650602409638554
i  and 0.023103748910200523


In [107]:
# Get the indices of the top 5 maximum values
flat_indices = np.argsort(-knesserProbMatrix.flatten())[:5]
indices_2d = np.unravel_index(flat_indices, probs.shape)

# Print the result
print("Top 5 maximum values indices:")
for index in zip(*indices_2d):
    unigram1 = list(langModel.unigramFreq.keys())[index[0]]
    unigram2 = list(langModel.unigramFreq.keys())[index[1]]
    print( f"{unigram2}  {unigram1} {knesserProbMatrix[index[0]][index[1]]}")



Top 5 maximum values indices:
don  t 0.970523179035375
href  http 0.9701841959845275
didn  t 0.9585175293178609
sort  of 0.9560665489257041
supposed  to 0.9168508626511942


In [86]:
import numpy as np
from collections import defaultdict


# Your actual data should be populated in probMatrix and unigramFreq

# Get the unique words from both unigramFreq and probMatrix


# Populate the probability matrix
for i, u1 in enumerate(langModel.unigramFreq.keys()):
    for j, u2 in enumerate(langModel.unigramFreq.keys()):
        conditional_prob = laplaceProbMatrix[u1][u2] if u1 in laplaceProbMatrix and u2 in laplaceProbMatrix[u1] else 0.0
        unigram_prob_u1 = langModel.unigramFreq[u1] / sum(langModel.unigramFreq.values()) if langModel.unigramFreq[u1] > 0 else 0.0
        probs[i, j] = conditional_prob * unigram_prob_u1

# Print the probability matrix
print("Probability Matrix:")
print(probs)


  conditional_prob = laplaceProbMatrix[u1][u2] if u1 in laplaceProbMatrix and u2 in laplaceProbMatrix[u1] else 0.0


In [80]:
langModel.bigramFreq["i"]["was"]/np.sum(list(langModel.unigramFreq.values()))

0.004106566420136477

In [53]:
probMatrix[probMatrix!=0].shape

(25664,)

In [46]:
def select_words(unigram_dict): # Selects a tuple 2 words at random from corpus
    words = tuple(unigram_dict.keys())
    num = len(words)-1
    wi_1 = unigram_dict[words[random.randint(0,num)]]
    wi = unigram_dict[words[random.randint(0,num)]]
    return (wi,wi_1) # Later word first here imp

def compareSmoothing(unigram_dict,probMatrix,knessar_ney_probMatrix,laplaceProbMatrix):
    print("Compare Smoothing for 10 random bigrams")
    for i in range(10):
        t = select_words(unigram_dict)
        print("For the words {} and {}".format(t[1],t[0]))
        print("True probabilty is: ", probMatrix[t[0]][t[1]])
        print("Kneser Ney probabilty is: ", knessar_ney_probMatrix[t[0]][t[1]])
        print("Laplace probabilty is: ", laplaceProbMatrix[t[0]][t[1]])
        print("\n\n")

In [47]:
probMatrix.shape

(5429, 5429)

In [54]:
compareSmoothing(langModel.unigramFreq,probMatrix,knesserProbMatrix, laplaceProbMatrix)

Compare Smoothing for 10 random bigrams
For the words 9 and 1
True probabilty is:  0.0
Kneser Ney probabilty is:  0.0005260612744957472
Laplace probabilty is:  0.00018412815319462345



For the words 9 and 1
True probabilty is:  0.0
Kneser Ney probabilty is:  0.0005260612744957472
Laplace probabilty is:  0.00018412815319462345



For the words 1 and 1
True probabilty is:  0.0
Kneser Ney probabilty is:  0.004734551470461725
Laplace probabilty is:  0.00018385732671446958



For the words 1 and 3
True probabilty is:  0.0
Kneser Ney probabilty is:  4.406452303097085e-05
Laplace probabilty is:  0.00018385732671446958



For the words 3 and 19
True probabilty is:  0.0
Kneser Ney probabilty is:  2.0505934043775875
Laplace probabilty is:  0.0001415227851684121



For the words 1 and 1
True probabilty is:  0.0
Kneser Ney probabilty is:  0.004734551470461725
Laplace probabilty is:  0.00018385732671446958



For the words 2 and 1
True probabilty is:  0.0
Kneser Ney probabilty is:  0.0757528235273

## Anger Samples

In [114]:
angerProbMatrix = langModel.createProbMatrix(0,0.999,emotional_dict,3)

In [115]:
# Anger Dataset
anger_sentences = []
while(len(anger_sentences)<50):
    cur_sentence = str.join(" ",langModel.generate_sentence())
    if emotion_scores(cur_sentence)[3]['score']>0.8 and len(cur_sentence)>5:
        anger_sentences.append(cur_sentence)
        print(len(anger_sentences))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


In [116]:
anger_sentences

['greeted greeted everybody but exceptions must always want on thursday and matt .',
 'believe believe there wouldnt have stopped feeling resentful and drew focalors sigil with their souls .',
 'i i mean the old louche animals are my pen and usually even know many of course with those chapters .',
 'exact exact meaning folks who don think ur being obnoxious for real life about on about rabbits i cross the cobwebs of offending real but didn t know deke s where crying and said wendy brown mop of disbelieving feeling rebellious to curl up really inhuman i wan na see what ill screw anything i post on craisins when no determination so angry aggressive and on something petty blogging world my book im so lazy mom s discussion with lizzy brought a police officer is a person to check ups they where she said im the moment someobdy is to watch hgtv and uncles bed left with occupy wall is why perhaps instinctively because maceys has indeed beaten the dorm inspected though .',
 'of of his life we h

In [118]:
with open("./anger_sentences.txt","w") as file:
    file.write(str.join("\n",anger_sentences))

In [117]:
sum = 0
for cur_sentence in anger_sentences:
    sum+=emotion_scores(cur_sentence)[3]['score']
print(sum/50)

0.9641385495662689


## Sadness sentences

In [120]:
sadnessProbMatrix = langModel.createProbMatrix(0,0.99,emotional_dict,0)

In [121]:
# Anger Dataset
sadness_sentences = []
while(len(sadness_sentences)<50):
    cur_sentence = str.join(" ",langModel.generate_sentence()  )
    if emotion_scores(cur_sentence)[0]['score']>0.8 and len(cur_sentence)>5:
        sadness_sentences.append(cur_sentence)
        print(len(sadness_sentences))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


In [122]:
sadness_sentences

['finally finally meet you have depression cycle today s emotions there would die but after spending years but generally leaves my health going around in return .',
 'fantasy fantasy of crying and disappointed .',
 'talented talented and please do lists i must be depression cycle hits even less awful for years ive spent on depressed .',
 'feel feel listless and utter gratefulness downright from deep recess in vain today was disappointed though perhaps you without missgivings .',
 'i i end times in peace in people my memories of death in this lonely very dull intensity of pathetic feeling less crappy because after we interact with ia .',
 'just just started back aching for several times especially if we realize i blanked a numb mental activity and hurt .',
 'feel feel pity for netflix for needy puppy not alone .',
 'feels feels empty and disappointed especially emotional that was last long after effects of himself i should mention our last hours for working i link href http i almost eve

In [123]:
with open("./sadness_sentences.txt","w") as file:
    file.write(str.join("\n",sadness_sentences))

In [124]:
sum = 0
for cur_sentence in sadness_sentences:
    sum+=emotion_scores(cur_sentence)[0]['score']
print(sum/50)

0.9909518647193909


## Joy Sentences

In [125]:
joyProbMatrix = langModel.createProbMatrix(0,0.999,emotional_dict,1)

In [126]:
# Anger Dataset
joy_sentences = []
while(len(joy_sentences)<50):
    cur_sentence = str.join(" ",langModel.generate_sentence())
    if emotion_scores(cur_sentence)[1]['score']>0.8 and len(cur_sentence)>5:
        joy_sentences.append(cur_sentence)
        print(len(joy_sentences))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


In [127]:
joy_sentences

['me me wherever you feeling calm my facebook right out by feeling or special gifts .',
 'the the artistic expression became more outgoing and hear .',
 'me me refocus .',
 'just just know from committing suicide or support me in accordance to earn it allows me will persuade me religious .',
 'was was minuscule growth as that when theres going how rapidly they re emergence .',
 'hand hand feel calm focused .',
 'feel feel satisfied until theyre ready but feel from it relish it done with making new situations .',
 'to to wake up for contemplation consolidation and hope i continue this afternoon in with me get the frugal antics improv challenge and lucky like ride on and genuinely looking than truly wonderful people have periods of product is great interested and spirit .',
 'things things done by where if id talk today is looking a fabulous plans you over yash more sad about exactly felt pretty .',
 'frustrated frustrated by all on around feeling so articulate opinions to feeling and et

In [128]:
with open("./joy_sentences.txt","w") as file:
    file.write(str.join("\n",joy_sentences))

In [129]:
sum = 0
for cur_sentence in joy_sentences:
    sum+=emotion_scores(cur_sentence)[1]['score']
print(sum/50)

0.985243513584137


## Love Sentences

In [130]:
loveProbMatrix = langModel.createProbMatrix(0,0.999,emotional_dict,2)

In [131]:
# Anger Dataset
love_sentences = []
while(len(love_sentences)<50):
    cur_sentence = str.join(" ",langModel.generate_sentence())
    if emotion_scores(cur_sentence)[2]['score']>0.8 and len(cur_sentence)>5:
        love_sentences.append(cur_sentence)
        print(len(love_sentences))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


In [132]:
love_sentences

['by by adrasteius and loved .',
 'and and warm breath so horny though there if only compassionate and warm feeling loved safe road layout looks tender about supporting other despite a lover whisper ones name it above the longing he tells me love as lovely polish .',
 'mark mark of loved today im loved today in love with whom i adore leonard cohen is loyal to kiss her gentle .',
 'so so nostalgic feeling treasured i desire .',
 'causes causes as it love what s different things can sleep on loved the longing when ever love to comfort me how soft they feeling you kiss her close and always wanted tango was crazy you do favours for caring for caring for me love but adoring love you kiss and go in love music gave me love in particular seems over sympathetic and soothing and love taylor swift because then again blessed now makes life such stories about supporting characters face turn to kiss and loving .',
 'my my beloved disciples a romantic platonic and compassionate awareness .',
 'and an

In [133]:
with open("./love_sentences.txt","w") as file:
    file.write(str.join("\n",love_sentences))

In [134]:
sum = 0
for cur_sentence in love_sentences:
    sum+=emotion_scores(cur_sentence)[2]['score']
print(sum/50)

0.991647971868515


## Fear Sentences

In [135]:
fearProbMatrix = langModel.createProbMatrix(0,0.999,emotional_dict,4)

In [136]:
# Anger Dataset
fear_sentences = []
while(len(fear_sentences)<50):
    cur_sentence = str.join(" ",langModel.generate_sentence())
    if emotion_scores(cur_sentence)[4]['score']>0.8 and len(cur_sentence)>5:
        fear_sentences.append(cur_sentence)
        print(len(fear_sentences))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


In [137]:
fear_sentences

['so so indecisive and suddenly startled me cringe .',
 'past past event where going through life s presence in public and maybe i see victoria take a fearful that one time at sunset its after nearly ran away feeling contractions at sunrise .',
 'to to percieve who is looking than reluctant xdd hohoho looking at sunrise .',
 'moments moments you jerked a kiss but ryeowook doesn t think about two as in too worried .',
 'have have usually unheard of sharing your life at school these things im sounding surprisingly like if only monday of nervousness and woke up fastforwarding through lulls in such small costume .',
 'feel feel vulnerable being scared now when the seagulls lapping water tracking terribly low confidence i nearly ran miles apart .',
 'my my eyelid if class or anything in someway i empathize with one when at status quo probably the class the computer generated transformers took to approach this strange stillness and have depth understanding of his fb damn shy of everything iv

In [138]:
with open("./fear_sentences.txt","w") as file:
    file.write(str.join("\n",fear_sentences))

In [139]:
sum = 0
for cur_sentence in fear_sentences:
    sum+=emotion_scores(cur_sentence)[4]['score']
print(sum/50)

0.9775424075126647


## Surprise Sentences

In [140]:
surpriseProbMatrix = langModel.createProbMatrix(0,0.999,emotional_dict,5)

In [142]:
# Anger Dataset
surprise_sentences = []
while(len(surprise_sentences)<50):
    cur_sentence = str.join(" ",langModel.generate_sentence())
    if emotion_scores(cur_sentence)[5]['score']>0.8 and len(cur_sentence)>5:
        surprise_sentences.append(cur_sentence)
        print(len(surprise_sentences))

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50


In [143]:
with open("./surprise_sentences.txt","w") as file:
    file.write(str.join("\n",surprise_sentences))

In [144]:
surprise_sentences

['it it made in shock at that shocked .',
 'like like amazing .',
 'how how stunned and curiosity is surprised as these amazing but feeling dazed confused again blessed to la .',
 'and and stunned and curious and stunned and surprised with how old hyphen .',
 'my my funny sort of shocked by how amazing exhibit .',
 'you you enthralled by and brimming with steve irwins family is surprised performance .',
 'outrageous outrageous i were to other books now like my funny bout peace and curiosity is unusual and stunned and curious .',
 'desperately desperately curious when four people think it first few sets of coppers .',
 'am am amazed to discover the changes cause me shocked that shocked my plant may become weird turn curious why perhaps more dazed and mood in those rare .',
 'frail frail delicate at all will respect i blinded feelings amazed at work from almost weird blogging if anyone after .',
 'to to discover his head and fill it probably feeling shocked rat in any wow factor at some 

In [145]:
sum = 0
for cur_sentence in surprise_sentences:
    sum+=emotion_scores(cur_sentence)[5]['score']
print(sum/50)

0.9839601385593414


In [148]:
print(f"Emotion of Anger: {anger_sentences[1]} {emotion_scores(anger_sentences[1])}")
print(f"Emotion of Fear: {fear_sentences[0]} {emotion_scores(fear_sentences[0])}")
print(f"Emotion of Joy: {joy_sentences[0]} {emotion_scores(joy_sentences[0])}")
print(f"Emotion of Love: {love_sentences[0]} {emotion_scores(love_sentences[0])}")
print(f"Emotion of Sadness: {sadness_sentences[0]} {emotion_scores(sadness_sentences[0])}")
print(f"Emotion of Surprise: {surprise_sentences[0]} {emotion_scores(surprise_sentences[0])}")

Emotion of Anger: believe believe there wouldnt have stopped feeling resentful and drew focalors sigil with their souls . [{'label': 'sadness', 'score': 0.0004484590608626604}, {'label': 'joy', 'score': 0.00023233778483700007}, {'label': 'love', 'score': 0.00023714876442681998}, {'label': 'anger', 'score': 0.998068630695343}, {'label': 'fear', 'score': 0.0008434911142103374}, {'label': 'surprise', 'score': 0.00016998843057081103}]
Emotion of Fear: so so indecisive and suddenly startled me cringe . [{'label': 'sadness', 'score': 0.00026056956266984344}, {'label': 'joy', 'score': 0.00038022184162400663}, {'label': 'love', 'score': 0.00018943285977002233}, {'label': 'anger', 'score': 0.0023601229768246412}, {'label': 'fear', 'score': 0.9920079708099365}, {'label': 'surprise', 'score': 0.004801682662218809}]
Emotion of Joy: me me wherever you feeling calm my facebook right out by feeling or special gifts . [{'label': 'sadness', 'score': 0.0004822932824026793}, {'label': 'joy', 'score': 0.9