In [1]:
'''
! pip install inflect
! python -m spacy download en_core_web_md
! pip install textacy
! pip install neuralcoref
'''

'\n! pip install inflect\n! python -m spacy download en_core_web_md\n! pip install textacy\n! pip install neuralcoref\n'

## Counting nouns – plural and singular nouns

* Determine whether a noun is plural or singular
* Turn plural nouns into singular nouns and vice versa

In [2]:
import sys
sys.path.append('..')

from Chapter01.pos_tagging import pos_tag_nltk

In [3]:
import nltk
from nltk.stem import WordNetLemmatizer
import inflect

In [6]:
with open('../Chapter01/sherlock_holmes_1.txt', 'r', encoding="utf-8") as f:
    text = f.read()

In [7]:
text = text.replace('\n', ' ')

In [8]:
text

'To Sherlock Holmes she is always _the_ woman. I have seldom heard him mention her under any other name. In his eyes she eclipses and predominates the whole of her sex. It was not that he felt any emotion akin to love for Irene Adler. All emotions, and that one particularly, were abhorrent to his cold, precise but admirably balanced mind. He was, I take it, the most perfect reasoning and observing machine that the world has seen, but as a lover he would have placed himself in a false position. He never spoke of the softer passions, save with a gibe and a sneer. They were admirable things for the observer—excellent for drawing the veil from men’s motives and actions. But for the trained reasoner to admit such intrusions into his own delicate and finely adjusted temperament was to introduce a distracting factor which might throw a doubt upon all his mental results. Grit in a sensitive instrument, or a crack in one of his own high-power lenses, would not be more disturbing than a strong e

In [11]:
words_with_pos = pos_tag_nltk(text)
words_with_pos[:5]

[('To', 'TO'),
 ('Sherlock', 'NNP'),
 ('Holmes', 'NNP'),
 ('she', 'PRP'),
 ('is', 'VBZ')]

In [12]:
def get_nouns(words_with_pos):
    nouns_set = ["NN", "NNS"]
    nouns = [word for word in words_with_pos if word[1] in nouns_set]
    return nouns

In [13]:
nouns = get_nouns(words_with_pos)
print(nouns)

[('woman', 'NN'), ('name', 'NN'), ('eyes', 'NNS'), ('whole', 'NN'), ('sex', 'NN'), ('emotion', 'NN'), ('akin', 'NN'), ('emotions', 'NNS'), ('cold', 'NN'), ('precise', 'NN'), ('mind', 'NN'), ('reasoning', 'NN'), ('machine', 'NN'), ('world', 'NN'), ('lover', 'NN'), ('position', 'NN'), ('passions', 'NNS'), ('gibe', 'NN'), ('sneer', 'NN'), ('things', 'NNS'), ('observer—excellent', 'NN'), ('veil', 'NN'), ('men', 'NNS'), ('motives', 'NNS'), ('actions', 'NNS'), ('reasoner', 'NN'), ('intrusions', 'NNS'), ('delicate', 'NN'), ('temperament', 'NN'), ('distracting', 'NN'), ('factor', 'NN'), ('doubt', 'NN'), ('results', 'NNS'), ('instrument', 'NN'), ('crack', 'NN'), ('high-power', 'NN'), ('lenses', 'NNS'), ('emotion', 'NN'), ('nature', 'NN'), ('woman', 'NN'), ('woman', 'NN'), ('memory', 'NN')]


In [50]:
def is_plural_nltk(noun_info):
    pos = noun_info[1]
    if pos == "NNS":
        return True
    else:
        return False

In [51]:
is_plural_nltk(nouns[2])

True

In [40]:
def is_plural_wn(noun):
    wnl = WordNetLemmatizer()
    lemma = wnl.lemmatize(noun, 'n')
    plural = True if noun is not lemma else False
    return plural

In [42]:
is_plural_wn('women')

True

In [16]:
def get_plural(singular_noun):
    p = inflect.engine()
    return p.plural(singular_noun)

In [44]:
get_plural('run')

'runs'

In [48]:
def get_singular(plural_noun):
    p = inflect.engine()
    plural = p.singular_noun(plural_noun)
    if plural :
        return plural
    else:
        return plural_noun

In [49]:
get_singular('emotions')

'emotion'

In [52]:
def plurals_wn(words_with_pos):
    other_nouns = []
    for noun_info in words_with_pos:
        word = noun_info[0]
        plural = is_plural_wn(word)
        if plural:
            singular = get_singular(word)
            other_nouns.append(singular)
        else:
            plural = get_plural(word)
            other_nouns.append(plural)
    return other_nouns

In [54]:
other_nouns_wn = plurals_wn(nouns)
other_nouns_wn

['women',
 'names',
 'eye',
 'wholes',
 'sexes',
 'emotions',
 'akins',
 'emotion',
 'colds',
 'precises',
 'minds',
 'reasonings',
 'machines',
 'worlds',
 'lovers',
 'positions',
 'passion',
 'gibes',
 'sneers',
 'thing',
 'observer—excellents',
 'veils',
 'mens',
 'motive',
 'action',
 'reasoners',
 'intrusion',
 'delicates',
 'temperaments',
 'distractings',
 'factors',
 'doubts',
 'result',
 'instruments',
 'cracks',
 'high-powers',
 'lens',
 'emotions',
 'natures',
 'women',
 'women',
 'memories']

## Getting the dependency parse

* A dependency parse is a tool that shows dependencies in a sentence. 

* For example, in the sentence The cat wore a hat, the root of the sentence in the verb, wore, and both the subject, the cat, and the object, a hat, are dependents. 

In [55]:
import spacy

In [56]:
sentence = 'I have seldom heard him mention her under any other name.'

In [57]:
nlp = spacy.load('en_core_web_sm')

In [58]:
doc = nlp(sentence)

In [60]:
for token in doc:
    print(token.text, '\t', token.dep_,'\t', spacy.explain(token.dep_))

I 	 nsubj 	 nominal subject
have 	 aux 	 auxiliary
seldom 	 advmod 	 adverbial modifier
heard 	 ROOT 	 None
him 	 nsubj 	 nominal subject
mention 	 ccomp 	 clausal complement
her 	 dobj 	 direct object
under 	 prep 	 prepositional modifier
any 	 det 	 determiner
other 	 amod 	 adjectival modifier
name 	 pobj 	 object of preposition
. 	 punct 	 punctuation


* ROOT is the main word that all the other words depend on, usually the verb.


* 
---

* To explore the dependency parse structure, we can use the attributes of the Token class. Using its ancestors and children attributes,


* We can get the **tokens that this token depends on** and the **tokens that depend on it**, respectively.

In [62]:
for token in doc:
    print(token.text)
    ancestor = [t.text for t in token.ancestors]
    print(ancestor)

I
['heard']
have
['heard']
seldom
['heard']
heard
[]
him
['mention', 'heard']
mention
['heard']
her
['mention', 'heard']
under
['mention', 'heard']
any
['name', 'under', 'mention', 'heard']
other
['name', 'under', 'mention', 'heard']
name
['under', 'mention', 'heard']
.
['heard']


In [64]:
for token in doc:
    print(token.text)
    children = [t.text for t in token.children]
    print(children)

I
[]
have
[]
seldom
[]
heard
['I', 'have', 'seldom', 'mention', '.']
him
[]
mention
['him', 'her', 'under']
her
[]
under
['name']
any
[]
other
[]
name
['any', 'other']
.
[]


In [66]:
for token in doc:
    print(token.text)
    subtree = [t.text for t in token.subtree]
    print(subtree)

I
['I']
have
['have']
seldom
['seldom']
heard
['I', 'have', 'seldom', 'heard', 'him', 'mention', 'her', 'under', 'any', 'other', 'name', '.']
him
['him']
mention
['him', 'mention', 'her', 'under', 'any', 'other', 'name']
her
['her']
under
['under', 'any', 'other', 'name']
any
['any']
other
['other']
name
['any', 'other', 'name']
.
['.']


In [67]:
from spacy import displacy
displacy.render(doc,jupyter=True)

* https://www.analyticsvidhya.com/blog/2020/07/part-of-speechpos-tagging-dependency-parsing-and-constituency-parsing-in-nlp/