Dictionary Tools - WordNet
===

If you are a user jump to section 3 (where you simply have to add seed word to the function and obtain the bag of words

## Imports

In [79]:
##import the module
import nltk

# nltk.download('all')
from nltk.corpus import wordnet as wn

## Synsets
Synsets are the set possible **meanings** of the word, this is a function from WordNet (wn). First we take meanings which can be Part of Speech, Nouns, Verbs, or Adjectives

### List of available languages

In [84]:
wn.langs()

dict_keys(['eng', 'als', 'arb', 'bul', 'cmn', 'dan', 'ell', 'fin', 'fra', 'heb', 'hrv', 'isl', 'ita', 'ita_iwn', 'jpn', 'cat', 'eus', 'glg', 'spa', 'ind', 'zsm', 'nld', 'nno', 'nob', 'pol', 'por', 'ron', 'lit', 'slk', 'slv', 'swe', 'tha'])

In [85]:
word = "anger"
language = "eng"


meanings = wn.synsets(word, pos=wn.NOUN + wn.VERB + wn.ADJ, lang=language)

for meaning in meanings:
    print(word, meaning, meaning.definition())

anger Synset('anger.n.01') a strong emotion; a feeling that is oriented toward some real or supposed grievance
anger Synset('anger.n.02') the state of being angry
anger Synset('wrath.n.02') belligerence aroused by a real or supposed wrong (personified as one of the deadly sins)
anger Synset('anger.v.01') make angry
anger Synset('anger.v.02') become angry


## Synonyms obtained with lemmas()

In [86]:
## here we pick up the first synset of the list.
print(meanings[0].name())

wn.synset(meanings[0].name()).lemmas(language)

anger.n.01


[Lemma('anger.n.01.anger'),
 Lemma('anger.n.01.choler'),
 Lemma('anger.n.01.ire')]

In [87]:
## access the word of the synonym

## here we pick up the third lemma of the list
## Note that the output writes capital Lemma, but input is with lowercase lemma
wn.lemma("anger.n.01.ire").name()

'ire'

## Hyponyms
Obtaining hyponyms (words that are semantically lower in the hierarchy) for example dogs and cats are hyponyms of animals. Below are specific kinds of anger

In [88]:
wn.synset("anger.n.01").hyponyms()

[Synset('annoyance.n.02'),
 Synset('bad_temper.n.01'),
 Synset('dander.n.02'),
 Synset('fury.n.01'),
 Synset('huffiness.n.01'),
 Synset('indignation.n.01'),
 Synset('infuriation.n.01'),
 Synset('umbrage.n.01')]

### Looping down the hierarchy with `.closure()`

Like this we can navigate the hierarchy all the way to the bottom

In [90]:
meaning = wn.synset("anger.n.01")

for item in meaning.closure(lambda s: s.hyponyms()):
    print(item)

Synset('annoyance.n.02')
Synset('bad_temper.n.01')
Synset('dander.n.02')
Synset('fury.n.01')
Synset('huffiness.n.01')
Synset('indignation.n.01')
Synset('infuriation.n.01')
Synset('umbrage.n.01')
Synset('aggravation.n.01')
Synset('displeasure.n.01')
Synset('frustration.n.03')
Synset('harassment.n.01')
Synset('pique.n.02')
Synset('fit.n.01')
Synset('irascibility.n.01')
Synset('lividity.n.01')
Synset('wrath.n.01')
Synset('dudgeon.n.01')


## Hypernyms
Obtaining hypernym (words that are semantically higher in the hierarchy). For example dogs and cats are hyponyms of animals. Below are specific kinds of anger

In [36]:
wn.synset("anger.n.01").hypernyms()

[Synset('emotion.n.01')]

In [91]:
wn.synset("anger.n.01").root_hypernyms()

[Synset('entity.n.01')]

## Generate all synonyms and hyponyms of a word


In [132]:
word = "neglect"
language = "eng"

print("Generating all synonyms for:", word)

meanings = wn.synsets(word, pos=wn.NOUN + wn.VERB + wn.ADJ, lang=language)
list_of_lemmas = []

for i, meaning in enumerate(meanings):
    print("\n", "### Synset no.", i + 1, "###\n")
    print("Meaning:", meaning.name())
    print("Definition:", meaning.definition())

    print("\n- Lemmas:")

    for j, lemma in enumerate(meaning.lemmas(language)):
        ##convert lemma to synset
        print("  -", lemma.name())
        list_of_lemmas += [lemma.name()]

    print("\n- Hyponyms:")

    for hyponym in meaning.closure(lambda s: s.hyponyms()):
        print("\n  -", hyponym.name())
        print("\n     - Definition:", hyponym.definition())
        print("\n     - Lemmas:")
        for lemma in hyponym.lemmas(language):
            ##convert lemma to synset
            print("        -", lemma.name())
            list_of_lemmas += [lemma.name()]

Generating all synonyms for: neglect

 ### Synset no. 1 ###

Meaning: disregard.n.01
Definition: lack of attention and due care

- Lemmas:
  - disregard
  - neglect

- Hyponyms:

  - omission.n.04

     - Definition: neglecting to do something; leaving out or passing over something

     - Lemmas:
        - omission

  - exception.n.01

     - Definition: a deliberate act of omission

     - Lemmas:
        - exception
        - exclusion
        - elision

  - oversight.n.01

     - Definition: an unintentional omission resulting from failure to notice something

     - Lemmas:
        - oversight
        - inadvertence

  - pretermission.n.01

     - Definition: letting pass without notice

     - Lemmas:
        - pretermission

 ### Synset no. 2 ###

Meaning: neglect.n.02
Definition: the state of something that has been unused and neglected

- Lemmas:
  - neglect
  - disuse

- Hyponyms:

  - omission.n.02

     - Definition: something that has been omitted

     - Lemmas:
        -

In [141]:
##eliminate list duplations by applying the set transformation & sort alphabetically
set_of_lemmas = sorted([*set(list_of_lemmas)])

print("set_of_lemmas:\n\n", set_of_lemmas)
print("\nLength:", len(set_of_lemmas))

set_of_lemmas:

 ['aggravate', 'aggravation', 'anger', 'angriness', 'annoyance', 'bad_temper', 'bridle', 'chafe', 'choler', 'combust', 'conniption', 'dander', 'displeasure', 'dudgeon', 'enrage', 'enragement', 'exacerbate', 'exasperate', 'exasperation', 'fit', 'frustration', 'fury', 'gall', 'hackles', 'harassment', 'high_dudgeon', 'huffiness', 'ill_temper', 'incense', 'indignation', 'infuriate', 'infuriation', 'ira', 'irascibility', 'ire', 'irk', 'irritation', 'lividity', 'madden', 'madness', 'miff', 'offence', 'offend', 'offense', 'outrage', 'pique', 'quick_temper', 'rage', 'raise_the_roof', 'scene', 'see_red', 'short_temper', 'spleen', 'steam', 'tantrum', 'temper', 'torment', 'umbrage', 'vexation', 'wrath']

Length: 60


In [142]:
## re-write with simpler loops and filters

## free parameters, seed word and language
seed_word = "anger"

## we create an empty list to store the final word list
list_of_lemmas = []

## a function to add a word to a list
add_to_list = lambda list1, item1: list1.append(item1)

## a function to return the hyponyms of a synset
hypos = lambda s: s.hyponyms()

## wn.synset obtains the list of synonyms and meanings for that word, in different syntactic categories
meanings = wn.synsets(seed_word, pos=wn.NOUN + wn.VERB + wn.ADJ)

## loop over set of meanings in synset
for meaning in meanings:

    ## print the definition of that meaning
    print(meaning, meaning.definition())

    ## append all synonyms (lemmas()) of that meaning to the list_of_lemmas
    [add_to_list(list_of_lemmas, lemma.name()) for lemma in meaning.lemmas(language)]

    ## loop over the list of all possible hyponyms
    for hyponym in meaning.closure(hypos):

        ## print the definition of each hyponym
        print(hyponym, hyponym.definition())

        ## append all synonyms (lemmas()) of that hyponym to the list_of_lemmas
        [
            add_to_list(list_of_lemmas, lemma.name())
            for lemma in hyponym.lemmas(language)
        ]

##eliminate list duplations by applying the set transformation & sort alphabetically
set_of_lemmas = sorted([*set(list_of_lemmas)])

print("\nset_of_lemmas:\n\n", set_of_lemmas)
print("\nLength:", len(set_of_lemmas))

Synset('anger.n.01') a strong emotion; a feeling that is oriented toward some real or supposed grievance
Synset('annoyance.n.02') anger produced by some annoying irritation
Synset('bad_temper.n.01') a persisting angry mood
Synset('dander.n.02') a feeling of anger and animosity
Synset('fury.n.01') a feeling of intense anger
Synset('huffiness.n.01') a passing state of anger and resentment
Synset('indignation.n.01') a feeling of righteous anger
Synset('infuriation.n.01') a feeling of intense anger
Synset('umbrage.n.01') a feeling of anger caused by being offended
Synset('aggravation.n.01') an exasperated feeling of annoyance
Synset('displeasure.n.01') the feeling of being displeased or annoyed or dissatisfied with someone or something
Synset('frustration.n.03') a feeling of annoyance at being hindered or criticized
Synset('harassment.n.01') a feeling of intense annoyance caused by being tormented
Synset('pique.n.02') a sudden outburst of anger
Synset('fit.n.01') a display of bad temper
Sy

#### 2.1 Exercise. Write the previous cell as a def function, and use the function to obtain a list of words of all terms related to joy

#### call should be like generate_word_list(seed_word, language)

In [41]:
#
#
#

In order to analyze the lemmas in relation to their synset, we can re-write the function

In [42]:
## define the function


def generate_word_list(seed_word, language):

    ## we create an empty list to store the final word list
    list_of_lemmas = []

    ## a function to add a word to a list
    add_to_list = lambda list1, item1: list1.append(item1)

    ## a function to return the hyponyms of a synset
    hypos = lambda s: s.hyponyms()

    ## wn.synset obtains the list of synonyms and meanings for that word, in different syntactic categories
    meanings = wn.synsets(seed_word, pos=wn.NOUN + wn.VERB + wn.ADJ)

    ## loop over set of meanings in synset
    for meaning in meanings:

        ## print the definition of that meaning, depicts the base word .name() for each lemma in the meaning list
        print(
            meaning,
            meaning.definition(),
            [lemma.name() for lemma in meaning.lemmas(language)],
        )

        ## append all synonyms (lemmas()) of that meaning to the list_of_lemmas
        [
            add_to_list(list_of_lemmas, lemma.name())
            for lemma in meaning.lemmas(language)
        ]

        ## loop over the list of all possible hyponyms
        for hyponym in meaning.closure(hypos):

            ## print the definition of each hyponym, depicts the base word .name() for each lemma in the hyponym list
            print(
                hyponym,
                hyponym.definition(),
                [lemma.name() for lemma in hyponym.lemmas(language)],
            )

            ## append all synonyms (lemmas()) of that hyponym to the list_of_lemmas
            [
                add_to_list(list_of_lemmas, lemma.name())
                for lemma in hyponym.lemmas(language)
            ]

    ##eliminate list duplations by applying the set transformation
    set_of_lemmas = [*set(list_of_lemmas)]

    ## sort alphabetically
    set_of_lemmas.sort()

    ##length
    length = len(set_of_lemmas)

    return (list_of_lemmas, length)

In [43]:
## free parameters, seed word and language
word = "joy"
language = "eng"

##execute
list_words, length = generate_word_list(seed_word, language)
print(list_words)
print(length)

Synset('anger.n.01') a strong emotion; a feeling that is oriented toward some real or supposed grievance ['anger', 'choler', 'ire']
Synset('annoyance.n.02') anger produced by some annoying irritation ['annoyance', 'chafe', 'vexation']
Synset('bad_temper.n.01') a persisting angry mood ['bad_temper', 'ill_temper']
Synset('dander.n.02') a feeling of anger and animosity ['dander', 'hackles']
Synset('fury.n.01') a feeling of intense anger ['fury', 'rage', 'madness']
Synset('huffiness.n.01') a passing state of anger and resentment ['huffiness']
Synset('indignation.n.01') a feeling of righteous anger ['indignation', 'outrage']
Synset('infuriation.n.01') a feeling of intense anger ['infuriation', 'enragement']
Synset('umbrage.n.01') a feeling of anger caused by being offended ['umbrage', 'offense', 'offence']
Synset('aggravation.n.01') an exasperated feeling of annoyance ['aggravation', 'exasperation']
Synset('displeasure.n.01') the feeling of being displeased or annoyed or dissatisfied with s

In order to create select a list of meanings, we create a the list of synsets

In [44]:
## define the function


def generate_word_list(seed_word, language):

    ## we create an empty list to store the final word list
    list_of_lemmas = []
    list_of_meanings = []

    ## a function to add a word to a list
    add_to_list = lambda list1, item1: list1.append(item1)

    ## a function to return the hyponyms of a synset
    hypos = lambda s: s.hyponyms()

    ## wn.synset obtains the list of synonyms and meanings for that word, in different syntactic categories
    meanings = wn.synsets(seed_word, pos=wn.NOUN + wn.VERB + wn.ADJ)

    ## loop over set of meanings in synset
    for meaning in meanings:

        ## print the definition of that meaning
        print(
            meaning,
            meaning.definition(),
            [lemma.name() for lemma in meaning.lemmas(language)],
        )
        list_of_meanings += [
            [
                meaning,
                meaning.definition(),
                [lemma.name() for lemma in meaning.lemmas(language)],
            ]
        ]

        ## append all synonyms (lemmas()) of that meaning to the list_of_lemmas
        [
            add_to_list(list_of_lemmas, lemma.name())
            for lemma in meaning.lemmas(language)
        ]

        ## loop over the list of all possible hyponyms
        for hyponym in meaning.closure(hypos):

            ## print the definition of each hyponym
            print(
                hyponym,
                hyponym.definition(),
                [lemma.name() for lemma in hyponym.lemmas(language)],
            )
            list_of_meanings += [
                [
                    hyponym,
                    hyponym.definition(),
                    [lemma.name() for lemma in hyponym.lemmas(language)],
                ]
            ]

            ## append all synonyms (lemmas()) of that hyponym to the list_of_lemmas
            [
                add_to_list(list_of_lemmas, lemma.name())
                for lemma in hyponym.lemmas(language)
            ]

    ##eliminate list duplations by applying the set transformation
    set_of_lemmas = [*set(list_of_lemmas)]

    ## sort alphabetically
    set_of_lemmas.sort()

    ##length
    length = len(set_of_lemmas)

    return (list_of_lemmas, length, list_of_meanings)

In [45]:
## free parameters, seed word and language
seed_word = "joy"
language = "eng"

##execute
list_words, length, list_meanings = generate_word_list(seed_word, language)
print(list_meanings)

Synset('joy.n.01') the emotion of great happiness ['joy', 'joyousness', 'joyfulness']
Synset('elation.n.02') a feeling of joy and pride ['elation', 'high_spirits', 'lightness']
Synset('exhilaration.n.01') the feeling of lively and cheerful joy ['exhilaration', 'excitement']
Synset('exuberance.n.01') joyful enthusiasm ['exuberance']
Synset('exultation.n.01') a feeling of extreme joy ['exultation', 'jubilance', 'jubilancy', 'jubilation']
Synset('euphoria.n.01') a feeling of great (usually exaggerated) elation ['euphoria', 'euphory']
Synset('bang.n.04') the swift release of a store of affective force ['bang', 'boot', 'charge', 'rush', 'flush', 'thrill', 'kick']
Synset('intoxication.n.03') excitement and elation beyond the bounds of sobriety ['intoxication']
Synset('titillation.n.01') a tingling feeling of excitement (as from teasing or tickling) ['titillation']
Synset('triumph.n.02') the exultation of victory ['triumph']
Synset('joy.n.02') something or someone that provides a source of ha

We can use the python inbuild function input() to collect a keypress from the user a return a behavior:

x = input()

if x == '1': do something

In [46]:
## decide which meanings to keep and which to remove
## this function iterates over the set of meanings
## for each meaning we are asked to type 1 if we want to keep the meaning, otherwise is not kept
## the input is a list of meanings


def select_meanings(list_meanings):

    ## an empty list for the final selected words
    selected_word_list = []

    ## we print the length of the list for reference - how long is it
    print("the list has:", len(list_meanings), "items")

    ## we set a counter to know where in the list we arre
    count = 0

    ##loop over the list
    for item in list_meanings:

        ## update the count
        count += 1

        ## print the count and the item
        print(count, item)

        ## print an instruction
        print("type 1 if the meaning is adequate, 0 otherwise")

        ## collect the response using input
        x = input()

        ##if response is 1, add the words in the item to the final list
        if x == "1":
            selected_word_list += item[2]

        ##otherwise continue the loop
        else:
            continue

    return selected_word_list

In [47]:
## execture the function
filtered_list = select_meanings(list_meanings)

the list has: 21 items
1 [Synset('joy.n.01'), 'the emotion of great happiness', ['joy', 'joyousness', 'joyfulness']]
type 1 if the meaning is adequate, 0 otherwise


 0


2 [Synset('elation.n.02'), 'a feeling of joy and pride', ['elation', 'high_spirits', 'lightness']]
type 1 if the meaning is adequate, 0 otherwise


 0


3 [Synset('exhilaration.n.01'), 'the feeling of lively and cheerful joy', ['exhilaration', 'excitement']]
type 1 if the meaning is adequate, 0 otherwise


 0


4 [Synset('exuberance.n.01'), 'joyful enthusiasm', ['exuberance']]
type 1 if the meaning is adequate, 0 otherwise


 0


5 [Synset('exultation.n.01'), 'a feeling of extreme joy', ['exultation', 'jubilance', 'jubilancy', 'jubilation']]
type 1 if the meaning is adequate, 0 otherwise


 0


6 [Synset('euphoria.n.01'), 'a feeling of great (usually exaggerated) elation', ['euphoria', 'euphory']]
type 1 if the meaning is adequate, 0 otherwise


 0


7 [Synset('bang.n.04'), 'the swift release of a store of affective force', ['bang', 'boot', 'charge', 'rush', 'flush', 'thrill', 'kick']]
type 1 if the meaning is adequate, 0 otherwise


 0


8 [Synset('intoxication.n.03'), 'excitement and elation beyond the bounds of sobriety', ['intoxication']]
type 1 if the meaning is adequate, 0 otherwise


 0


9 [Synset('titillation.n.01'), 'a tingling feeling of excitement (as from teasing or tickling)', ['titillation']]
type 1 if the meaning is adequate, 0 otherwise


 0


10 [Synset('triumph.n.02'), 'the exultation of victory', ['triumph']]
type 1 if the meaning is adequate, 0 otherwise


 0


11 [Synset('joy.n.02'), 'something or someone that provides a source of happiness', ['joy', 'delight', 'pleasure']]
type 1 if the meaning is adequate, 0 otherwise


 00


12 [Synset('rejoice.v.01'), 'feel happiness or joy', ['rejoice', 'joy']]
type 1 if the meaning is adequate, 0 otherwise


 0


13 [Synset('cheer.v.04'), 'become cheerful', ['cheer', 'cheer_up', 'chirk_up']]
type 1 if the meaning is adequate, 0 otherwise


 0


14 [Synset('exult.v.01'), 'feel extreme happiness or elation', ['exult', 'walk_on_air', 'be_on_cloud_nine', 'jump_for_joy']]
type 1 if the meaning is adequate, 0 otherwise


 0


15 [Synset('gladden.v.02'), 'become glad or happy', ['gladden']]
type 1 if the meaning is adequate, 0 otherwise


 0


16 [Synset('exuberate.v.01'), 'to express great joy', ['exuberate', 'exult', 'rejoice', 'triumph', 'jubilate']]
type 1 if the meaning is adequate, 0 otherwise


 


17 [Synset('lighten.v.03'), 'become more cheerful', ['lighten', 'lighten_up', 'buoy_up']]
type 1 if the meaning is adequate, 0 otherwise


 0


18 [Synset('wallow.v.04'), 'be ecstatic with joy', ['wallow', 'rejoice', 'triumph']]
type 1 if the meaning is adequate, 0 otherwise


 0


19 [Synset('glory.v.01'), 'rejoice proudly', ['glory']]
type 1 if the meaning is adequate, 0 otherwise


 0


20 [Synset('gladden.v.01'), 'make glad or happy', ['gladden', 'joy']]
type 1 if the meaning is adequate, 0 otherwise


 0


21 [Synset('overjoy.v.01'), 'cause to feel extremely joyful or happy', ['overjoy']]
type 1 if the meaning is adequate, 0 otherwise


 0


In [48]:
## print the final list
print(filtered_list)

[]


## 3. Let's put all together so that we can basically input a seed word and have the pipeline run automatically
 

In [55]:
## declare the functions

## Function 1
## input is a seed word and a language, output is a list of words, length of the list, and a list of synsets
def generate_word_list(seed_word, language):

    ## we create an empty list to store the final word list
    list_of_lemmas = []
    list_of_meanings = []

    ## a function to add a word to a list
    add_to_list = lambda list1, item1: list1.append(item1)

    ## a function to return the hyponyms of a synset
    hypos = lambda s: s.hyponyms()

    ## wn.synset obtains the list of synonyms and meanings for that word, in different syntactic categories
    meanings = wn.synsets(seed_word, pos=wn.NOUN + wn.VERB + wn.ADJ)

    ## loop over set of meanings in synset
    for meaning in meanings:

        ## print the definition of that meaning
        # print(meaning, meaning.definition(), [lemma.name() for lemma in meaning.lemmas(language)])
        list_of_meanings += [
            [
                meaning,
                meaning.definition(),
                [lemma.name() for lemma in meaning.lemmas(language)],
            ]
        ]

        ## append all synonyms (lemmas()) of that meaning to the list_of_lemmas
        [
            add_to_list(list_of_lemmas, lemma.name())
            for lemma in meaning.lemmas(language)
        ]

        ## loop over the list of all possible hyponyms
        for hyponym in meaning.closure(hypos):

            ## print the definition of each hyponym
            # print(hyponym, hyponym.definition(), [lemma.name() for lemma in hyponym.lemmas(language)])
            list_of_meanings += [
                [
                    hyponym,
                    hyponym.definition(),
                    [lemma.name() for lemma in hyponym.lemmas(language)],
                ]
            ]

            ## append all synonyms (lemmas()) of that hyponym to the list_of_lemmas
            [
                add_to_list(list_of_lemmas, lemma.name())
                for lemma in hyponym.lemmas(language)
            ]

    ##eliminate list duplations by applying the set transformation
    set_of_lemmas = [*set(list_of_lemmas)]

    ## sort alphabetically
    set_of_lemmas.sort()

    ##length
    length = len(set_of_lemmas)

    return (list_of_lemmas, length, list_of_meanings)


## Function 2
## decide which meanings to keep and which to remove
## this function iterates over the set of meanings
## for each meaning we are asked to type 1 if we want to keep the meaning, otherwise is not kept
## the input is a list of meanings


def select_meanings(list_meanings):

    ## an empty list for the final selected words
    selected_word_list = []

    ## we print the length of the list for reference - how long is it
    print("the list has:", len(list_meanings), "items")

    ## we set a counter to know where in the list we arre
    count = 0

    ##loop over the list
    for item in list_meanings:

        ## update the count
        count += 1

        ## print the count and the item
        print(count, item)

        ## print an instruction
        print("type 1 if the meaning is adequate, 0 otherwise")

        ## collect the response using input
        x = input()

        ##if response is 1, add the words in the item to the final list
        if x == "1":
            try:
                selected_word_list += item[2]
            except TypeError:
                continue

        ##otherwise continue the loop
        else:
            continue

    ##eliminate list duplations by applying the set transformation
    selected_word_list = [*set(selected_word_list)]

    ## sort alphabetically
    selected_word_list.sort()

    return selected_word_list


## Function 3, we put together functions 1 and 2
def generate_and_filter(seed_word, language):
    list_of_synsets = generate_word_list(seed_word, language)[2]
    filtered_list = select_meanings(list_of_synsets)

    return filtered_list

In [None]:
# free parameters, seed word and language
seed_word = "anger"
language = "eng"

## execute
filtered_list = generate_and_filter(seed_word, language)
print("the final list is: ", filtered_list)

## 4. Bag of words approach

We want to compute words within a bag of words are common in a certain text


In [61]:
text1 = "john threw a tantrum so big that made his mother explode with fury, not only towards him but also towards the exasperating gods who angered her every day"
text2 = "at first, the bright day made him feel alive, and then dead, then alive again, ultimately all was an illusion, including the inappropriate love that he now felt for his sister"

In [62]:
## anger bag of words

bag_of_words = filtered_list

print(bag_of_words)

['aggravate', 'aggravation', 'anger', 'angriness', 'annoyance', 'bad_temper', 'bridle', 'chafe', 'choler', 'combust', 'conniption', 'dander', 'displeasure', 'dudgeon', 'enrage', 'enragement', 'exacerbate', 'exasperate', 'exasperation', 'fit', 'frustration', 'fury', 'gall', 'hackles', 'harassment', 'high_dudgeon', 'huffiness', 'ill_temper', 'incense', 'indignation', 'infuriate', 'infuriation', 'ira', 'irascibility', 'ire', 'irk', 'irritation', 'lividity', 'madden', 'madness', 'miff', 'offence', 'offend', 'offense', 'outrage', 'pique', 'quick_temper', 'rage', 'raise_the_roof', 'scene', 'see_red', 'short_temper', 'spleen', 'steam', 'tantrum', 'temper', 'torment', 'umbrage', 'vexation', 'wrath']


In [63]:
# first we tokenize the text
tokenized_text = nltk.word_tokenize(text1)

# we get the frequency distributions
freqs = nltk.FreqDist(tokenized_text)

# now we show the counts of each word in text1
counts = freqs.most_common()

print(counts)

[('towards', 2), ('john', 1), ('threw', 1), ('a', 1), ('tantrum', 1), ('so', 1), ('big', 1), ('that', 1), ('made', 1), ('his', 1), ('mother', 1), ('explode', 1), ('with', 1), ('fury', 1), (',', 1), ('not', 1), ('only', 1), ('him', 1), ('but', 1), ('also', 1), ('the', 1), ('exasperating', 1), ('gods', 1), ('who', 1), ('angered', 1), ('her', 1), ('every', 1), ('day', 1)]


In [64]:
import nltk

## create a variable to count the instances of words in the bag of words which appear in the text
bag_of_words_count = 0

## loop over the words in the frequency distribution list of the text
for word_count in counts:
    word = word_count[0]
    count = word_count[1]

    ##if the word is in bag of words, add the count of words to the counter
    if word in bag_of_words:

        print(word)
        bag_of_words_count += count
    print(bag_of_words_count)

0
0
0
0
tantrum
1
1
1
1
1
1
1
1
1
fury
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2


4.1. Exercise - write a function to calculate the frequency of a bag of words in a text

# Gold standard tool to perform bag of words approach

https://www.liwc.app/
