## Importing Libraries

In [1]:
import pandas as pd
import spacy

from spacy import displacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")

## Example Set

In [2]:
active = ['Hens lay eggs.',
         'Birds build nests.']
passive = ['Eggs are laid by hens',
           'Nests are built by birds']

## Visualizing the First Parse

In [3]:
doc = nlp(active[0])

for token in doc:
    print(token.text, " -- ", token.dep_)

Hens  --  nsubj
lay  --  ROOT
eggs  --  dobj
.  --  punct


In [4]:
displacy.render(doc, style="dep")

## Visualizing All Parses

In [5]:
print("Active Voice Sentences")
print("------------------------------------------------------------------")
for sent in active:
    doc = nlp(sent)

    for token in doc:
        print(token.text, " -- ", token.dep_)
    
    displacy.render(doc, style="dep")

print("\n")
print("Passive Voice Sentences")
print("------------------------------------------------------------------")
for sent in passive:
    doc = nlp(sent)

    for token in doc:
        print(token.text, " -- ", token.dep_)
    
    displacy.render(doc, style="dep")

Active Voice Sentences
------------------------------------------------------------------
Hens  --  nsubj
lay  --  ROOT
eggs  --  dobj
.  --  punct


Birds  --  nsubj
build  --  ROOT
nests  --  dobj
.  --  punct




Passive Voice Sentences
------------------------------------------------------------------
Eggs  --  nsubjpass
are  --  auxpass
laid  --  ROOT
by  --  agent
hens  --  pobj


Nests  --  nsubjpass
are  --  auxpass
built  --  ROOT
by  --  agent
birds  --  pobj


## Creating a Rule for `passive voice`

In [6]:
passive_rule = [{'DEP':'nsubjpass'}]

matcher = Matcher(nlp.vocab)
matcher.add("Rule", [passive_rule])
matcher(nlp(passive[0]))

[(15740618714089435985, 0, 1)]

## `Passive Voice` : A Function

In [7]:
def is_passive(doc):
    
    passive_rule = [{'DEP':{"IN":['nsubjpass', 'auxpass']}}]

    matcher = Matcher(nlp.vocab)
    matcher.add("Rule", [passive_rule])
    
    if len(matcher(doc)) > 0:
        words = []
        
        for match in matcher(doc):
            words.append(doc[match[1]:match[2]])
            
        return True, words
    else:
        return False

## Verifying All Parses

In [8]:
for sent in active:
    doc = nlp(sent)
    print(is_passive(doc))

False
False


In [9]:
for sent in passive:
    doc = nlp(sent)
    print(is_passive(doc))

(True, [Eggs, are])
(True, [Nests, are])


## A Dataset

In [10]:
active_passive = pd.read_csv("../input/activepassive/active_passive.csv")
active_passive.head(2)

Unnamed: 0,Active,Passive
0,He reads a novel.,A novel is read.
1,He does not cook food.,Food is not cooked by him.


In [11]:
active_passive.shape

(40, 2)

In [12]:
active_set = active_passive.Active
passive_set = active_passive.Passive

## Check Function on `Active Sentences`

In [13]:
cnt = 0
for sent in active_passive.Active:
    if not is_passive(nlp(sent)):
        cnt += 1
        
print("Active Sentences : ", cnt)

Active Sentences :  40


## Check Function on `Passive Sentences`

In [14]:
cnt = 0
for sent in active_passive.Passive:
    if is_passive(nlp(sent)):
        cnt += 1
        
print("Passive Sentences : ", cnt)

Passive Sentences :  40
