In [1]:
from os import path

In [2]:
fname = path.expanduser('~/nltk_data/corpora/abc/science.txt')
with open(fname, 'rb') as fp:
    data = fp.read().decode(errors='replace')

In [3]:
articles = data.split('\r\n\r\n')
article = articles[0]
print(article)

Cystic fibrosis affects 30,000 children and young adults in the US alone
Inhaling the mists of salt water can reduce the pus and infection that fills the airways of cystic fibrosis sufferers, although side effects include a nasty coughing fit and a harsh taste. 
That's the conclusion of two studies published in this week's issue of The New England Journal of Medicine.
They found that inhaling a mist with a salt content of 7 or 9% improved lung function and, in some cases, produced less absenteeism from school or work. 
Cystic fibrosis, a progressive and frequently fatal genetic disease that affects about 30,000 young adults and children in the US alone, is marked by a thickening of the mucus which makes it harder to clear the lungs of debris and bacteria. 
The salt water solution "really opens up a new avenue for approaching patients with cystic fibrosis and how to treat them," says Dr Gail Weinmann, of the US National Heart, Lung, and Blood Institute, which sponsored one of the s

In [4]:
from nltk.tokenize import sent_tokenize

In [5]:
sents = sent_tokenize(article)
print(sents[0])

Cystic fibrosis affects 30,000 children and young adults in the US alone
Inhaling the mists of salt water can reduce the pus and infection that fills the airways of cystic fibrosis sufferers, although side effects include a nasty coughing fit and a harsh taste.


In [6]:
from nltk.corpus import stopwords
import re

In [7]:
stop = set(stopwords.words('english'))

In [8]:
def is_ok(token):
    return re.match('^[a-z]+$', token) and token not in stop

In [9]:
from nltk.tokenize import word_tokenize
def tokenize(sent):
    return [word for word in word_tokenize(sent.lower()) if is_ok(word)]

In [10]:
from collections import Counter

In [11]:
def summarize(text, n=3):
    sents = sent_tokenize(text)
    bow = [tokenize(sent) for sent in sents]
    tf = Counter()
    for sent in bow:
        tf.update(sent)
        
    def score(i):
        return sum(tf[word] for word in bow[i])
    
    idx = sorted(range(len(bow)), key=score, reverse=True)[:n]
    return [sents[i] for i in idx]

In [12]:
summarize(articles[0])

['Cystic fibrosis affects 30,000 children and young adults in the US alone\r\nInhaling the mists of salt water can reduce the pus and infection that fills the airways of cystic fibrosis sufferers, although side effects include a nasty coughing fit and a harsh taste.',
 'The salt water solution "really opens up a new avenue for approaching patients with cystic fibrosis and how to treat them," says Dr Gail Weinmann, of the US National Heart, Lung, and Blood Institute, which sponsored one of the studies.',
 'The team found that the 83 volunteers who regularly inhaled a 7% mist of salty water had fewer breathing problems and less absenteeism from school or work than those who inhaled a solution with a salt content of under 1%.']