In [34]:
import spacy
import textacy

In [98]:
nlp = spacy.load('en_core_web_sm')
sentences = ['All living things are made of cells.', 'Cells have organelles.']
verb_patterns = [[{'POS': 'AUX'},
                 {'POS': 'VERB'},
                 {'POS': 'ADP'}],
                 [{'POS': 'VERB'}]]

def find_root_of_sentence(doc):
    '''The root token of the sentence is normally the main verb.'''
    root_token = None
    for token in doc:
        if token.dep_ == 'ROOT':
            root_token = token
    return root_token

def contains_root(verb_phrase, root):
    return root.i >= verb_phrase.start and root.i <= verb_phrase.end

def get_verb_phrases(doc):
    root = find_root_of_sentence(doc)
    verb_phrases = textacy.extract.matches.token_matches(doc, verb_patterns)
    
    new_vps = []
    for verb_phrase in verb_phrases:
        if contains_root(verb_phrase, root):
            new_vps.append(verb_phrase)
    return new_vps

def longer_verb_phrase(verb_phrases):
    longest_length = 0
    longest_verb_phrase = None
    for verb_phrase in verb_phrases:
        if len(verb_phrase) > longest_length:
            longest_verb_phrase = verb_phrase
    return longest_verb_phrase

def find_noun_phrase(verb_phrase, noun_phrases, side):
    for noun_phrase in noun_phrases:
        if side == 'left' and noun_phrase.start < verb_phrase.start:
            return noun_phrase
        elif side == 'right' and noun_phrase.start > verb_phrase.start:
            return noun_phrase

def find_triplet(sentence):
    doc = nlp(sentence)
    verb_phrases = get_verb_phrases(doc)
    noun_phrases = doc.noun_chunks
    verb_phrase = None
    if len(verb_phrases) > 1:
        verb_phrase = longer_verb_phrase(list(verb_phrases))
    else:
        verb_phrase = verb_phrases[0]
    left_noun_phrase = find_noun_phrase(verb_phrase, noun_phrases, 'left')
    right_noun_phrase = find_noun_phrase(verb_phrase, noun_phrases, 'right')
    return (left_noun_phrase, verb_phrase, right_noun_phrase)

In [99]:
for i, sentence in enumerate(sentences):
    left_np, vp, right_np = find_triplet(sentence)
    print(left_np, '\t', vp, '\t', right_np)

All living things 	 are made of 	 cells
Cells 	 have 	 organelles
