In [1]:
import spacy

In [70]:
nlp = spacy.load('en_core_web_sm')
sentence = "He eats cheese, but he won't eat ice cream."
doc = nlp(sentence)

for token in doc:
    ancestors = [t.text for t in token.ancestors]
    children = [t.text for t in token.children]
    print(token.text, '\t', token.i, '\t', 
          token.pos_, '\t', token.dep_, '\t',
          ancestors, '\t', children)

He 	 0 	 PRON 	 nsubj 	 ['eats'] 	 []
eats 	 1 	 VERB 	 ROOT 	 [] 	 ['He', 'cheese', ',', 'but', 'eat']
cheese 	 2 	 NOUN 	 dobj 	 ['eats'] 	 []
, 	 3 	 PUNCT 	 punct 	 ['eats'] 	 []
but 	 4 	 CCONJ 	 cc 	 ['eats'] 	 []
he 	 5 	 PRON 	 nsubj 	 ['eat', 'eats'] 	 []
wo 	 6 	 AUX 	 aux 	 ['eat', 'eats'] 	 []
n't 	 7 	 PART 	 neg 	 ['eat', 'eats'] 	 []
eat 	 8 	 VERB 	 conj 	 ['eats'] 	 ['he', 'wo', "n't", 'cream', '.']
ice 	 9 	 NOUN 	 compound 	 ['cream', 'eat', 'eats'] 	 []
cream 	 10 	 NOUN 	 dobj 	 ['eat', 'eats'] 	 ['ice']
. 	 11 	 PUNCT 	 punct 	 ['eat', 'eats'] 	 []


In [71]:
def find_root_of_sentence(doc):
    '''The root token of the sentence is normally the main verb.'''
    root_token = None
    for token in doc:
        if token.dep_ == 'ROOT':
            root_token = token
    return root_token

In [72]:
root_token = find_root_of_sentence(doc)
root_token

eats

In [73]:
def find_other_verbs(doc, root_token):
    other_verbs = []
    for token in doc:
        ancestors = list(token.ancestors)
        if token.pos_ == 'VERB' and len(ancestors) == 1 and ancestors[0] == root_token:
            other_verbs.append(token)
    return other_verbs

In [74]:
other_verbs = find_other_verbs(doc, root_token)
other_verbs

[eat]

In [75]:
def get_clause_token_span_for_verb(verb, doc, all_verbs):
    first_token_index = len(doc)
    last_token_index = 0
    this_verb_children = list(verb.children)
    for child in this_verb_children:
        if child not in all_verbs:
            if child.i < first_token_index:
                first_token_index = child.i
            if child.i > last_token_index:
                last_token_index = child.i
    return (first_token_index, last_token_index)

In [76]:
token_spans = []
all_verbs = [root_token] + other_verbs
for other_verb in all_verbs:
    first_token_index, last_token_index = get_clause_token_span_for_verb(other_verb, doc, all_verbs)
    token_spans.append((first_token_index, last_token_index))

In [77]:
sentence_clauses = []
for (start, end) in token_spans:
    if start < end:
        clause = doc[start:end]
        sentence_clauses.append(clause)
sentence_clauses = sorted(sentence_clauses, key=lambda tup: tup[0])
clauses_text = [clause.text for clause in sentence_clauses]
clauses_text

['He eats cheese,', "he won't eat ice cream"]