<h1>Passive-Active Translator (PAT)</h1>

Given a well-formed sentence in natural language (English), convert to active or passive voice.

In [1]:
import spacy, re
from spacy.matcher import Matcher
from spacy import displacy
from lemminflect import getInflection, getLemma
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging

In [5]:
nlp = spacy.load("en_core_web_sm")

In [6]:
srl = Predictor.from_path("data\structured-prediction-srl-bert.2020.12.15.tar.gz")

error loading _jsonnet (this is expected on Windows), treating C:\Users\antsp\AppData\Local\Temp\tmps54g_0hr\config.json as plain json


In [7]:
with open('data/active.txt', 'r', encoding='utf8') as a:
    lines = a.readlines()
    active_sentences = [line.strip() for line in lines]
a.close()

with open('data/passive.txt', 'r', encoding='utf8') as p:
    lines = p.readlines()
    passive_sentences = [line.strip() for line in lines]
p.close()

In [45]:
class Sentence():
    
    def __init__(self, text):
        self.text = text
        self.spacy_object = nlp(self.text)
        self.tokens = [token.text for token in self.spacy_object]
        self.lemmas = [token.lemma_ for token in self.spacy_object]
        self.tags = {str(token): token.tag_ for token in self.spacy_object}
        self.dependencies = {str(token): token.dep_ for token in self.spacy_object}
        self.srls = srl.predict(sentence = self.text)
            
    def voice(self): # Determines voice given a well-formed sentence.
        matcher = Matcher(nlp.vocab)
        pattern_passive = [{'DEP':'nsubjpass'},
                           {'DEP':'aux','OP':'*'},
                           {'DEP':'auxpass'},
                           {'TAG':'VBN'}]
        matcher.add('Passive', [pattern_passive])
        if matcher(self.spacy_object):
            voice = "passive"
        else:
            voice = "active"
        return voice
    
    def arguments(self): # Extracts agent/patient phrases
        verbs = self.srls['verbs']
        
        agent = None
        patient = None
        beneficiary = None
        modal = None
        negation = None
        temporality = None
        aux = None
        root = None
        
        for verb in verbs:
            description = verb['description']
            
            if 'ARG0' in description or 'ARG1' in description:
                if 'ARG0:' in description: # Agent
                    agent = re.search('\[ARG0.*?\]', description).group()
                    agent = agent[7: len(agent) - 1]
                if 'ARG1:' in description: # Patient
                    patient = re.search('\[ARG1.*?\]', description).group()
                    patient = patient[7: len(patient) - 1]
                if 'ARG2:' in description: # Beneficiary
                    beneficiary = re.search('\[ARG2.*?\]', description).group()
                    beneficiary = beneficiary[7: len(beneficiary) - 1]
                if 'ARGM-MOD' in description: # Modal
                    modal = re.search('\[ARGM-MOD.*?\]', description).group()
                    modal = modal[11:len(modal) - 1]
                if 'ARGM-NEG' in description: # Negation
                    negation = re.search('\[ARGM-NEG.*?\]', description).group()
                    negation = negation[11:len(negation) - 1]
                if 'ARGM-TMP' in description: # Temporality
                    temporality = re.search('\[ARGM-TMP.*?\]', description).group()
                    temporality = temporality[11:len(temporality) - 1]
                if 'V:' in description: # Root & Fullroot
                    root = re.search('\[V.*?\]', description).group()
                    root = root[4:len(root)-1]
                    
                    tokens = description.split(']')
                    for token in tokens:
                        if 'V:' in token:
                            try:
                                aux = re.search('.*?\[V', token).group()
                                aux = aux[1:len(aux)-3]
                                if aux == '':
                                    aux = None
                            except:
                                aux = None
            if 'ARGM' in description or 'ARG2' in description or 'ARG3' in description or 'ARG4' in description:
                break
            else:
                continue
                    
        
        return {'ARG0': agent, 
                'ARG1': patient,
                'ARG2': beneficiary,
                'ARGM-MOD': modal, 
                'ARGM-NEG': negation, 
                'ARGM-TMP': temporality, 
                'AUX': aux, 
                'ROOT': root}
    
    def agent(self):
        arguments = self.arguments()
        if arguments['ARG0'] != None:
            agent_phrase = nlp(self.arguments()['ARG0']) # Parse the phrase to find dependents.
            for chunk in agent_phrase.noun_chunks:
                agent = chunk.text.split()
                if "'" in agent:
                    i = agent.index("'")
                    agent[i-1] = agent[i-1] + agent[i]
                    del agent[i]
                    agent = ' '.join(agent)
                    return agent
                else:
                    return ' '.join(agent)
        else:
            return None
    
    def patient(self):
        arguments = self.arguments()
        if arguments['ARG1'] != None:
            patient_phrase = nlp(self.arguments()['ARG1']) # Parse the phrase to find dependents.
            for chunk in patient_phrase.noun_chunks:
                patient = chunk.text.split()
                if "'" in patient:
                    i = patient.index("'")
                    patient[i-1] = patient[i-1] + patient[i]
                    del patient[i]
                    patient = ' '.join(patient)
                    return patient
                else:
                    return ' '.join(patient)
        else:
            return None
        
    def beneficiary(self):
        arguments = self.arguments()
        if arguments['ARG2'] != None:
            beneficiary = arguments['ARG2'].split()
            if "'" in beneficiary:
                i = beneficiary.index("'")
                beneficiary[i-1] = beneficiary[i-1] + beneficiary[i]
                del beneficiary[i]
                beneficiary = ' '.join(beneficiary)
                return beneficiary
            else:
                return ' '.join(beneficiary)
        else:
            return None

In [46]:
active = []
for sentence in active_sentences:
    active.append(Sentence(sentence))
    
passive = []
for sentence in passive_sentences:
    passive.append(Sentence(sentence))

In [47]:
for sentence in active:
    print("Sentence " + str(active.index(sentence)) + ": " + sentence.text)
    print("Agent: " + str(sentence.agent()))
    print("Patient: " + str(sentence.patient()))
    print("Beneficiary: " + str(sentence.beneficiary()))
    print("Root: " + str(sentence.arguments()["ROOT"]))
    print("Auxiliaries: " + str(sentence.arguments()["AUX"]))
    print("Modal: " + str(sentence.arguments()["ARGM-MOD"]))
    print("Negation: " + str(sentence.arguments()["ARGM-NEG"]))
    print("Temporality: " + str(sentence.arguments()["ARGM-TMP"]))
    print()

Sentence 0: Harry ate six shrimp at dinner.
Agent: Harry
Patient: six shrimp
Beneficiary: None
Root: ate
Auxiliaries: None
Modal: None
Negation: None
Temporality: None

Sentence 1: Beautiful giraffes roam the savannah.
Agent: Beautiful giraffes
Patient: the savannah
Beneficiary: None
Root: roam
Auxiliaries: None
Modal: None
Negation: None
Temporality: None

Sentence 2: Sue changed the flat tire.
Agent: Sue
Patient: the flat tire
Beneficiary: None
Root: changed
Auxiliaries: None
Modal: None
Negation: None
Temporality: None

Sentence 3: We are going to watch a movie tonight.
Agent: We
Patient: a movie
Beneficiary: None
Root: watch
Auxiliaries: are going to
Modal: None
Negation: None
Temporality: tonight

Sentence 4: I ran the obstacle course in record time.
Agent: I
Patient: the obstacle course
Beneficiary: None
Root: ran
Auxiliaries: None
Modal: None
Negation: None
Temporality: in record time

Sentence 5: The crew paved the entire stretch of highway.
Agent: The crew
Patient: the entire 

In [48]:
for sentence in passive:
    print("Sentence " + str(passive.index(sentence)) + ": " + sentence.text)
    print("Agent: " + str(sentence.agent()))
    print("Patient: " + str(sentence.patient()))
    print("Beneficiary: " + str(sentence.beneficiary()))
    print("Root: " + str(sentence.arguments()["ROOT"]))
    print("Auxiliaries: " + str(sentence.arguments()["AUX"]))
    print("Modal: " + str(sentence.arguments()["ARGM-MOD"]))
    print("Negation: " + str(sentence.arguments()["ARGM-NEG"]))
    print("Temporality: " + str(sentence.arguments()["ARGM-TMP"]))
    print()

Sentence 0: At dinner, six shrimp were eaten by Harry.
Agent: Harry
Patient: six shrimp
Beneficiary: None
Root: eaten
Auxiliaries: were
Modal: None
Negation: None
Temporality: None

Sentence 1: The savannah is roamed by beautiful giraffes.
Agent: beautiful giraffes
Patient: The savannah
Beneficiary: None
Root: roamed
Auxiliaries: is
Modal: None
Negation: None
Temporality: None

Sentence 2: The flat tire was changed by Sue.
Agent: Sue
Patient: The flat tire
Beneficiary: None
Root: changed
Auxiliaries: was
Modal: None
Negation: None
Temporality: None

Sentence 3: A movie is going to be watched by us tonight.
Agent: us
Patient: A movie
Beneficiary: None
Root: watched
Auxiliaries: is going to be
Modal: None
Negation: None
Temporality: tonight

Sentence 4: The obstacle course was run by me in record time.
Agent: me
Patient: The obstacle course
Beneficiary: None
Root: run
Auxiliaries: was
Modal: None
Negation: None
Temporality: in record time

Sentence 5: The entire stretch of highway was pa

In [56]:
print(active[23].srls)

{'verbs': [{'verb': 'carried', 'description': '[ARG0: The kangaroo] [V: carried] [ARG1: her baby] [ARG2: in her pouch] .', 'tags': ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARG2', 'I-ARG2', 'I-ARG2', 'O']}], 'words': ['The', 'kangaroo', 'carried', 'her', 'baby', 'in', 'her', 'pouch', '.']}


In [79]:
def inflectObjectPerson(chunk):
    
    subject_object = {'i':'me',
                      'we':'us',
                      'they':'them',
                      'you':'you',
                      'he':'him',
                      'she':'her',
                      'who':'whom'}
    
    text = chunk.split()
    
    for key in subject_object.keys():
        if key in text:
            i = text.index(key)
            text[i] = subject_object[key]

    return ' '.join(text)

In [80]:
def passivize(s): # Takes plain text sentence.
    sentence = Sentence(s)
    if sentence.voice == "passive":
        passive_sentence = sentence.text
    else:
        if sentence.agent() != None:
            agent = sentence.agent().lower()
            agent = inflectObjectPerson(agent)
            agent = "by " + agent
        else:
            agent = ''
        
        if sentence.patient() != None:
            patient = sentence.patient()
            # conjugate patient (e.g., me --> I)
        else:
            patient = ''
        
        dative = False
        if sentence.beneficiary() != None:
            beneficiary_nlp = nlp(sentence.beneficiary())
            tags = [token.tag_ for token in beneficiary_nlp]
            if "IN" == tags[0]:
                beneficiary = beneficiary_nlp.text
                dative = True
            else:
                beneficiary = "to " + beneficiary_nlp.text
        else:
            beneficiary = ''
        
        root = sentence.arguments()["ROOT"]
        # conjugate root
        
        # add auxiliaries
        if sentence.arguments()["ARGM-NEG"] == None:
            neg = ''
        else:
            neg = sentence.arguments()["ARGM-NEG"]
        
        if sentence.arguments()["ARGM-MOD"] == None:
            modal = ''
        else:
            modal = sentence.arguments()["ARGM-MOD"]
        
        if sentence.arguments()["ARGM-TMP"] == None:
            tmp = ''
        elif sentence.arguments()["ARGM-TMP"] == 'always' or sentence.arguments()["ARGM-TMP"] == 'never' or sentence.arguments()["ARGM-TMP"] == 'ever':
            tmp = sentence.arguments()["ARGM-TMP"]
        else:
            tmp = ''
        
        a = sentence.text.index(sentence.agent())
        if a > 0:
            left_side = sentence.text[:a]
        else:
            left_side = ''
        
        # recompose
        if dative == True:
            passive_sentence = [left_side, patient, modal, neg, tmp, root, agent, beneficiary]
                    
        else:
            passive_sentence = [left_side, patient, modal, neg, tmp, root, beneficiary, agent]
        
        passive_sentence = [token for token in passive_sentence if token != '']
        passive_sentence = ' '.join(passive_sentence)
        
        # get right side extras
        pieces = [left_side, sentence.agent(), sentence.patient(), sentence.beneficiary(), modal, neg, tmp, root]
        index = -1
        right_most = []
        for piece in pieces:
            if piece != None and piece != '':
                if sentence.text.index(piece) > index:
                    index = sentence.text.index(piece)
                    right_most.append(piece)
                else:
                    continue
        
        passive_sentence = passive_sentence + sentence.text[index + len(right_most[-1]):]
    return passive_sentence

In [81]:
def activize(s): # Takes plain text sentence.
    sentence = Sentence(s)
    if sentence.voice == "active":
        active_sentence = sentence.text
    # else:
        # get agent
        # conjugate agent (e.g., me --> I)
        
        # get patient
        # conjugate patient (e.g., I --> me)
        
        # get root
        # conjugate root
        
        # add auxiliaries
        
        # get left side extras
        
        # get right side extras
        
        # recompose
        
        # passive_sentence = recomposition
    return active_sentence

In [82]:
for sentence in active_sentences:
    print(passivize(sentence))

six shrimp ate by harry at dinner.
the savannah roam by beautiful giraffes.
the flat tire changed by sue.
a movie watch by us tonight.
the obstacle course ran by me in record time.
the entire stretch paved by the crew of highway.
the novel read by mom in one day.
the house will clean by me every Saturday.
a safety video requires to staff by the company every year.
the entire house painted by tom.
the students' questions always answers by the teacher.
that piece enjoys by the choir.
the whole suburb destroyed by a forest fire.
the treaty signing by the two kings.
the office dusts by the cleaning crew every night.
money donated by larry to the homeless shelter.
all the reservations making by the wedding planner.
two dozen cupcakes will bake by susan for the bake sale.
the comet viewed by the science class.
instructions will give to you by the director.
the Grand Canyon visit by thousands every year.
the house remodeled by the homeowners to help it sell.
the metal beams corroded by the sa