In [1]:
# Daniel Bandala @ dic 2022
from nltk.tokenize import sent_tokenize,word_tokenize
from nltk.corpus import stopwords
from collections import defaultdict
from string import punctuation
from heapq import nlargest
import urllib.request
from bs4 import BeautifulSoup

In [7]:
class FrequencySummarizer:
    def __init__(self, min_cut=0.1, max_cut=0.9):
        """
        Initilize the text summarizer.
        Words that have a frequency term lower than min_cut
        or higer than max_cut will be ignored.
        """
        self._min_cut = min_cut
        self._max_cut = max_cut
        self._stopwords = set(stopwords.words('english') +
        list(punctuation))
    
    def _compute_frequencies(self, word_sent):
        """
            Compute the frequency of each of word.
            Input:
            word_sent, a list of sentences already tokenized.
            Output:
            freq, a dictionary where freq[w] is the frequency of w.
        """
        freq = defaultdict(int)
        for s in word_sent:
            for word in s:
                if word not in self._stopwords:
                    freq[word] += 1
        # frequencies normalization and fitering
        m = float(max(freq.values()))
        for w in list(freq.keys()):
            freq[w] = freq[w]/m
            if freq[w] >= self._max_cut or freq[w] <= self._min_cut:
                del freq[w]
        return freq
    
    def summarize(self, text, n):
        """
            Return a list of n sentences
            which represent the summary of text.
        """
        sents = sent_tokenize(text)
        assert n <= len(sents)
        word_sent = [word_tokenize(s.lower()) for s in sents]
        self._freq = self._compute_frequencies(word_sent)
        ranking = defaultdict(int)
        for i,sent in enumerate(word_sent):
            for w in sent:
                if w in self._freq:
                    ranking[i] += self._freq[w]
        sents_idx = self._rank(ranking, n)
        return [sents[j] for j in sents_idx]
    
    def _rank(self, ranking, n):
        """ return the first n sentences with highest ranking """
        return nlargest(n, ranking, key=ranking.get)

In [8]:
def get_only_text(url):
    """ 
        return the title and the text of the article
        at the specified url
    """
    #page = urllib2.urlopen(url).read().decode('utf8')
    page = []
    for line in urllib.request.urlopen(url):
        page.append(line.decode('utf-8'))
    page = ' '.join(page)
    soup = BeautifulSoup(page)
    text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
    return soup.title.text, text

In [12]:
feed_xml = []
for line in urllib.request.urlopen('https://news.un.org/feed/subscribe/en/news/topic/climate-change/feed/rss.xml'): #http://feeds.bbci.co.uk/news/rss.xml
    feed_xml.append(line.decode('utf-8'))
feed_xml = ' '.join(feed_xml)
feed = BeautifulSoup(feed_xml)
to_summarize = map(lambda p: p.text, feed.find_all('guid'))

In [13]:
fs = FrequencySummarizer()

In [14]:
for article_url in list(to_summarize)[:5]:
    title, text = get_only_text(article_url)
    print('----------------------------------')
    print(title)
    for s in fs.summarize(text, 2):
        print('*',s)

----------------------------------
UN appeals for record $51.5 billion to help 230 million on the brink in 2023 | UN News
* Turning to the threat of famine, he said that five countries “are already experiencing what we call famine-like conditions, where we can confidently and unhappily say that people are dying as a result - and it tends to be children – of displacement, food insecurity, lack of food, starvation.” In 2023, 45 million people in 37 countries risk starvation, according to the Global Humanitarian Overview.
* “I fear that 2023 is going to be an acceleration of all those trends, and that’s why we say … that we hope 2023 will be a year of solidarity, just as 2022 has been a year of suffering.” Speaking in Geneva at the launch of the Global Humanitarian Overview report 2023, Mr. Griffiths described the appeal as a “lifeline” for people on the brink.
----------------------------------
Large parts of world drier than normal in 2021: WMO | UN News
* “The impacts of climate change