# Creating an Article Summarizer

In [1]:
import bs4 as bs
import urllib.request
import re 
import nltk
import heapq

In [2]:
source = urllib.request.urlopen('https://en.wikipedia.org/wiki/Climate_change').read()
print(source)



In [3]:
soup = bs.BeautifulSoup(source , 'lxml')
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Climate change - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"X-KeuwpAIDEAAJH7bNMAAABL","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Climate_change","wgTitle":"Climate change","wgCurRevisionId":998176034,"wgRevisionId":998176034,"wgArticleId":5042951,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Harv and Sfn no-target errors","CS1 maint: multiple names: authors list","Wikipedia indefinitely move-protected pages","Wikipedia indefinitely semi-protected pages","Articles wit

In [4]:
text = ''
for paragraph in soup.find_all('p'):
    text += paragraph.text
    
text



# Preprocessing the text

In [5]:
#Removing all the numbers and extra spaces 
text = re.sub(r'\[[0-9]*\]' , ' ' , text)
text = re.sub(r'\s+', '  ' , text)
text



In [6]:
clean_text = text.lower()
clean_text = re.sub(r'\W' , ' ', clean_text)
clean_text = re.sub(r'\d' , ' ' , clean_text)
clean_text = re.sub(r'\s+' , ' ' , clean_text)
clean_text



# Tokenization

In [7]:
sentences = nltk.sent_tokenize(text)
len(sentences)

427

In [8]:
stop_words = nltk.corpus.stopwords.words('english')

In [9]:
word2count = {}
for word in nltk.word_tokenize(clean_text):
    if word not in stop_words:
        if word not in word2count.keys():
            word2count[word] = 1 
        else:
            word2count[word] += 1
            
word2count

{'climate': 140,
 'change': 82,
 'includes': 1,
 'global': 59,
 'warming': 83,
 'driven': 2,
 'human': 16,
 'emissions': 63,
 'greenhouse': 56,
 'gases': 34,
 'resulting': 2,
 'large': 15,
 'scale': 10,
 'shifts': 2,
 'weather': 9,
 'patterns': 6,
 'though': 1,
 'previous': 1,
 'periods': 1,
 'climatic': 1,
 'since': 15,
 'mid': 5,
 'th': 6,
 'century': 15,
 'humans': 5,
 'unprecedented': 2,
 'impact': 5,
 'earth': 25,
 'system': 13,
 'caused': 8,
 'largest': 4,
 'driver': 2,
 'emission': 4,
 'carbon': 40,
 'dioxide': 6,
 'co': 48,
 'methane': 11,
 'fossil': 15,
 'fuel': 10,
 'burning': 5,
 'energy': 39,
 'consumption': 2,
 'main': 4,
 'source': 2,
 'additional': 10,
 'contributions': 2,
 'agriculture': 11,
 'deforestation': 7,
 'industrial': 13,
 'processes': 7,
 'cause': 9,
 'disputed': 1,
 'scientific': 10,
 'body': 2,
 'national': 5,
 'international': 4,
 'standing': 2,
 'temperature': 23,
 'rise': 20,
 'accelerated': 1,
 'tempered': 1,
 'feedbacks': 9,
 'loss': 3,
 'sunlight': 11,

In [10]:
for key in word2count.keys():
    word2count[key] = word2count[key]/max(word2count.values())

In [11]:
sent2score = {}
for sentence in sentences:
    for word in nltk.word_tokenize(sentence.lower()):
        if word in word2count.keys():
            if len(sentence.split(' ')) < 25:
                if sentence not in sent2score.keys():
                    sent2score[sentence] = word2count[word]
                else:
                    sent2score[sentence] += word2count[word]
    
    
sent2score

{'Rising  temperatures  are  limiting  ocean  productivity  and  harming  fish  stocks.': 2.5185185185185186,
 'Ecological  collapse  risk.': 0.962962962962963,
 'Bleaching  has  damaged  the  Great  Barrier  Reef  and  threatens  reefs  worldwide.': 1.629629629629629,
 'Heat  wave  intensification.': 2.518518518518518,
 'Events  like  the  June  2019  European  heat  wave  are  becoming  more  common.': 4.222222222222221,
 'Agricultural  changes.': 2.0357598978288634,
 'Droughts,  rising  temperatures,  and  extreme  weather  negatively  impact  agriculture.': 4.378991060025542,
 'Shown:  Texas,  USA.': 0.7407407407407407,
 'Environmental  migration.': 1.037037037037037,
 'Sparser  rainfall  leads  to  desertification  that  harms  agriculture  and  can  displace  populations.': 2.3141762452107275,
 'Shown:  Telly,  Mali.': 0.7407407407407407,
 'Tidal  flooding.': 0.7407407407407407,
 'Sea-level  rise  increases  flooding  in  low-lying  coastal  regions.': 4.120051085568327,
 'Shown:

In [12]:
best_sentences = heapq.nlargest(5,sent2score , key = sent2score.get)

print('This is the text summary of the article')
print('-----------------------------------------------')
for sentence in best_sentences:
    print(sentence)

This is the text summary of the article
-----------------------------------------------
As  climate  change  effects  vary  across  regions,  so  do  adaptation  strategies.
Droughts,  rising  temperatures,  and  extreme  weather  negatively  impact  agriculture.
Events  like  the  June  2019  European  heat  wave  are  becoming  more  common.
Sea-level  rise  increases  flooding  in  low-lying  coastal  regions.
Carbon  pricing  mechanisms  include  carbon  taxes  and  emissions  trading  systems.
