In [14]:
def extract_candidate_chunks(text, grammar=r'KT: {(<JJ>* <NN.*>+ <IN>)? <JJ>* <NN.*>+}'):
    import itertools, nltk, string
    
    # exclude candidates that are stop words or entirely punctuation
    punct = set(string.punctuation)
    stop_words = set(nltk.corpus.stopwords.words('english'))
    # tokenize, POS-tag, and chunk using regular expressions
    chunker = nltk.chunk.regexp.RegexpParser(grammar)
    tagged_sents = nltk.pos_tag_sents(nltk.word_tokenize(sent) for sent in nltk.sent_tokenize(text))
    all_chunks = list(itertools.chain.from_iterable(nltk.chunk.tree2conlltags(chunker.parse(tagged_sent))
                                                    for tagged_sent in tagged_sents))
    # join constituent chunk words into a single chunked phrase
    candidates = [' '.join(word for word, pos, chunk in group).lower()
                  for key, group in itertools.groupby(all_chunks, lambda (word,pos,chunk): chunk != 'O') if key]

    print(len(candidates))
    candidates.sort()

    return [cand for cand in candidates
            if cand not in stop_words and not all(char in punct for char in cand)]

In [15]:
text = "If Neymar never ever signs for Manchester United, imagine how many lives would have been wasted speculating on whether he might. Lives that could have been spent making worthwhile contributions to society – curing disease, fighting injustice, cuddling orphaned monkeys - but were instead sucked into a bottomless vacuum of pointless rumour-mongering that helped no one. Actually, this is not even a conversation Hot Football Transfer Gossip wants to get involved in on a Monday morning. It's too distressing. Instead, let's turn our attention to the Daily Express, which says that Neymar could be on his way to Man Utd. Woah, cool! This is not strictly the belief of the Express, but the belief of Real Madrid president Florentino Perez, who reportedly 'feels' that Neymar has a Nou Camp 'escape plan', due to his unhappiness with Barca's faltering form and his 'tense relationship' with Lionel Messi. The reason this is good news for United is that they are 'one of the few teams who would be able to afford him'. Or, at least, one of the few clubs willing to meet his £170m release clause. Remember, the one that was inserted when Neymar signed a new contract a few months ago. The contract that was supposed to end all this speculation and help us regain our sanity and our dignity. Remember THAT? Deep breaths… Moving on, the Red Devils have also received a boost in their pursuit of Antoine Griezmann after he appeared to rule out a move from Atletico Madrid to rivals Real, describing the idea as 'dead' (Sun). Hot Football Transfer Gossip’s eye was also caught by a adjacent Sun story about Danny Dyer earning £400,000 a year on EastEnders – £100,000 more than Steve McFadden aka Phil Mitchell. That's outrageous! Phil doesn't deserve that after all his years of sterling service. Umm, sorry… Man Utd are also keen on Everton’s Romelu Lukaku and Torino striker Andrea Belotti, with Jose Mourinho admitting he is 'very far' from assembling a title-winning team and prepared to make four major signings this summer (Manchester Evening News). Meanwhile, Manchester City are the latest club described as 'leading the race' to sign Alexis Sanchez, with Chilean paper Cooperativa reporting that Pep Guardiola is prepared to shell out £50m for the Arsenal contract rebel. Elsewhere, the agent of Frank 'new Yaya Toure' Kessie says Liverpool and Arsenal are keen on the 20-year-old Atalanta midfielder (Mirror), Chelsea are one of several clubs interested in signing Real Madrid defensive legend Pepe (Marca) and Real president Perez is so concerned with the side's recent attacking displays that he is planning to sell either Gareth Bale or Karim Benzema or Cristiano Ronaldo (aka the BBC) in the summer (Don Balon). That'll be Benzema then."

extract_candidate_chunks(text)

97


["'feels",
 "'tense relationship",
 '20-year-old atalanta midfielder',
 'adjacent sun story about danny dyer',
 'agent of frank',
 'alexis sanchez',
 'arsenal',
 'arsenal contract rebel',
 'attention',
 'bbc',
 'belief',
 'belief of real madrid president florentino perez',
 'benzema',
 'boost',
 'bottomless vacuum of pointless rumour-mongering',
 'chelsea',
 'chilean paper cooperativa reporting',
 'club',
 'contract',
 'conversation hot football transfer gossip',
 'cool',
 'cristiano ronaldo',
 'daily express',
 'deep breaths\xe2\x80\xa6',
 'dignity',
 'disease',
 'don balon',
 'everton\xe2\x80\x99s romelu lukaku',
 'express',
 'faltering form',
 'few clubs',
 'few months',
 'few teams',
 'gareth bale',
 'good news for united',
 'hot football transfer gossip\xe2\x80\x99s eye',
 'idea',
 'injustice',
 'jose mourinho',
 'karim benzema',
 'kessie',
 'lionel messi',
 'liverpool',
 'lives',
 'major signings',
 'man utd',
 'manchester city',
 'manchester evening news',
 'many lives',
 'marca