<a href="https://colab.research.google.com/github/chandan110791/NLP/blob/main/Dep_parsing_Commented.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import libraries
import spacy
from spacy import displacy
from spacy.matcher import Matcher
import pandas as pd
nlp = spacy.load("en_core_web_sm")

### Lets check our rule on a larger corpus

In [None]:
# load the dataset csv file
active_passive = pd.read_csv('active_passive.csv')
active_passive.head(2)

Unnamed: 0,Active,Passive
0,He reads a novel.,A novel is read.
1,He does not cook food.,Food is not cooked by him.


In [None]:
# Print the shape of the dataframe.


active_passive.shape


(40, 2)

In [None]:
# Separate out active and passive sentences in arrays.


active = active_passive['Active']
passive = active_passive['Passive']



### Create the rule

In [None]:


passive_rule = [{'DEP':'nsubjpass'}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])



In [None]:
def is_passive(doc,matcher):
    if len(matcher(doc))>0:
        return True
    else:
        return False


### Check rule on active voice sentences

In [None]:


cnt = 0
for sent in active:
    doc = nlp(sent)
    if not is_passive(doc,matcher):
        cnt += 1
print(cnt)



40


### Check rule on passive voice sentences

In [None]:
cnt = 0
for sent in passive:
    doc = nlp(sent)
    if is_passive(doc,matcher):
        cnt += 1
print(cnt)

38


### Let's troubleshoot

In [None]:


cnt = 0
missed = []
for sent in passive:
    doc = nlp(sent)
    if is_passive(doc,matcher):
        cnt += 1
    else:
        missed.append(doc)
print(cnt)



38


In [None]:
missed[0]

Are books being purchased by him?

In [None]:


missed[1]



Is a table being bought by Ritika?

### Let's visualize their dependency trees

In [None]:
for doc in missed:
    displacy.render(doc, style="dep")

In [None]:
spacy.explain("auxpass")


'auxiliary (passive)'

[Dependencies](https://universaldependencies.org/docs/en/dep/)

### Update our rule
[Reference](https://spacy.io/usage/rule-based-matching)

In [None]:
passive_rule = [{'DEP':{"IN":['nsubjpass','auxpass']}}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[passive_rule])

In [None]:


cnt = 0
for sent in active:
    doc = nlp(sent)
    if not is_passive(doc,matcher):
        cnt += 1
print(cnt)



40


In [None]:
cnt = 0
missed = []
for sent in passive:
    doc = nlp(sent)
    if is_passive(doc,matcher):
        cnt += 1
    else:
        missed.append(doc)
print(cnt)


40


## Summary
 - Always test your rules and hueristics on a larger corpus to see the effectiveness of the rules
 - One can write intricate matching rules using `matcher` object

In [None]:
newSet= ['What is your name?','Is coffee serving here?','Is she being promoted as new assistant?','Women are said to live longer than men.']
cnt = 0
missed = []
for sent in newSet:
    doc = nlp(sent)
    if is_passive(doc,matcher):
        cnt += 1
        print(doc)
    else:
        missed.append(doc)
print(cnt)

Is she being promoted as new assistant?
Women are said to live longer than men.
2


In [None]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

doc = nlp('Women are said to live longer than men.')

displacy.render(doc, style="dep", jupyter = True)

In [None]:
aux_rule = [{'DEP':{"IN":['aux']}}]
matcher = Matcher(nlp.vocab)
matcher.add('Rule',[aux_rule])

In [None]:
newSet= ['Sofia is learning NLP.','Eggs are laid by Hens.','Mouse is eaten by a black cat.','She has done her job productively.']
cnt = 0
missed = []
for sent in newSet:
    doc = nlp(sent)
    if is_passive(doc,matcher):
        cnt += 1
        print(doc)
    else:
        missed.append(doc)
print(cnt)




Sofia is learning NLP.
She has done her job productively.
2


In [None]:

import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

doc = nlp('JetAirways cancelled the flight this morning which was already late.')

displacy.render(doc, style="dep", jupyter = True)

In [None]:
import spacy
from spacy.matcher import Matcher
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)
pattern = [{"DEP":{"IN":["nsubj","nsubjpass","csubj","csubjpass"]}}]
matcher.add("subject", [pattern])

In [None]:
import spacy
from spacy.matcher import Matcher
nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab)
pattern = [{"DEP":{"IN":["nsubj","nsubjpass","csubj"]}}]
matcher.add("subject", [pattern])


In [None]:
newSet= ['JetAirways cancelled the flight this morning which was already late.']
cnt = 0
missed = []
for sent in newSet:
    doc = nlp(sent)
    if is_passive(doc,matcher):
        cnt += 1
        print(doc)
    else:
        missed.append(doc)
print(cnt)

JetAirways cancelled the flight this morning which was already late.
1


In [None]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

doc = nlp('Dole was defeated by Clinton')

displacy.render(doc, style="dep", jupyter = True)
len(list(doc[2].children))

3