In [46]:
text = """
Wikipedia (/ˌwɪkɪˈpiːdiə/ (About this soundlisten) wik-ih-PEE-dee-ə or /ˌwɪki-/ (About this soundlisten) wik-ee-) is a free, multilingual open-collaborative online encyclopedia created and maintained by a community of volunteer contributors using a wiki-based editing system. Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]

Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia". Initially available only in English, versions in other languages were quickly developed. The English Wikipedia, with 6.3 million articles as of March 2021, is the largest of the 319 language editions. Combined, Wikipedia's editions comprise more than 55 million articles, and attract more than 17 million edits and more than 1.7 billion unique visitors per month.[7]

Wikipedia has been criticized for its uneven accuracy and for exhibiting systemic bias, particularly gender bias, with the majority of editors being male.[8] In 2006, Time magazine stated that the open-door policy of allowing anyone to edit had made Wikipedia the "biggest and perhaps the best encyclopedia in the world", and a testament to the vision of Jimmy Wales.[9] The project's reputation improved further in the 2010s, as it received praise for its unique structure, culture, and absence of commercial bias.[3][8] In 2018, Facebook and YouTube announced that they would help users detect fake news by suggesting links to related Wikipedia articles.[10]
""" 

In [47]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [48]:
stopwords = list(STOP_WORDS)
stopwords

['her',
 'they',
 'see',
 'together',
 "n't",
 'show',
 'an',
 'part',
 'who',
 '‘s',
 'regarding',
 'forty',
 'seemed',
 'are',
 'will',
 'along',
 'toward',
 'does',
 "'ll",
 'several',
 '’re',
 'them',
 'else',
 'full',
 'hence',
 'whereas',
 'otherwise',
 'indeed',
 'themselves',
 'whither',
 'below',
 'beyond',
 'seem',
 'whose',
 'six',
 'therefore',
 'give',
 'to',
 'again',
 'wherever',
 'on',
 'himself',
 'become',
 'therein',
 'across',
 'your',
 'until',
 'into',
 'would',
 'go',
 'done',
 'often',
 'whereafter',
 'herein',
 'nine',
 'except',
 'his',
 'however',
 'seems',
 'already',
 'five',
 'been',
 'hers',
 'most',
 'any',
 'formerly',
 'other',
 'during',
 'quite',
 'i',
 'top',
 'besides',
 'being',
 'through',
 'n’t',
 'none',
 'much',
 '’ve',
 'no',
 'here',
 'only',
 'unless',
 'empty',
 'can',
 'bottom',
 'such',
 'using',
 'these',
 'nothing',
 'whole',
 'under',
 'back',
 'sometime',
 'many',
 'last',
 'off',
 'alone',
 'herself',
 'those',
 'may',
 'throughout'

In [49]:
nlp = spacy.load('en_core_web_sm') 
nlp

<spacy.lang.en.English at 0x241aa7ef130>

In [50]:
doc = nlp(text)
doc


Wikipedia (/ˌwɪkɪˈpiːdiə/ (About this soundlisten) wik-ih-PEE-dee-ə or /ˌwɪki-/ (About this soundlisten) wik-ee-) is a free, multilingual open-collaborative online encyclopedia created and maintained by a community of volunteer contributors using a wiki-based editing system. Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]

Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia". Initially available only in English, versions in other languages were quickly developed. The English Wikipedia, with 6.3 million articles as of March 2021, is the largest of the 319

In [51]:
tokens = [token.text for token in doc]
print(tokens)

['\n', 'Wikipedia', '(', '/ˌwɪkɪˈpiːdiə/', '(', 'About', 'this', 'soundlisten', ')', 'wik', '-', 'ih', '-', 'PEE', '-', 'dee', '-', 'ə', 'or', '/ˌwɪki-/', '(', 'About', 'this', 'soundlisten', ')', 'wik', '-', 'ee-', ')', 'is', 'a', 'free', ',', 'multilingual', 'open', '-', 'collaborative', 'online', 'encyclopedia', 'created', 'and', 'maintained', 'by', 'a', 'community', 'of', 'volunteer', 'contributors', 'using', 'a', 'wiki', '-', 'based', 'editing', 'system', '.', 'Wikipedia', 'is', 'the', 'largest', 'general', 'reference', 'work', 'on', 'the', 'Internet,[3', ']', 'and', 'one', 'of', 'the', '15', 'most', 'popular', 'websites', 'as', 'ranked', 'by', 'Alexa', ';', 'in', '2021', ',', 'it', 'was', 'ranked', 'as', 'the', '13th', 'most', 'visited.[4][note', '3', ']', 'The', 'project', 'carries', 'no', 'advertisements', 'and', 'is', 'hosted', 'by', 'the', 'Wikimedia', 'Foundation', ',', 'an', 'American', 'non', '-', 'profit', 'organization', 'funded', 'mainly', 'through', 'donations', ',', '

In [52]:
punctuation = punctuation + '\n'
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'

In [53]:
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else: 
                word_frequencies[word.text] += 1
                
print(word_frequencies)

{'Wikipedia': 8, '/ˌwɪkɪˈpiːdiə/': 1, 'soundlisten': 2, 'wik': 2, 'ih': 1, 'PEE': 1, 'dee': 1, 'ə': 1, '/ˌwɪki-/': 1, 'ee-': 1, 'free': 1, 'multilingual': 1, 'open': 2, 'collaborative': 1, 'online': 1, 'encyclopedia': 3, 'created': 1, 'maintained': 1, 'community': 1, 'volunteer': 1, 'contributors': 1, 'wiki': 2, 'based': 1, 'editing': 1, 'system': 1, 'largest': 2, 'general': 1, 'reference': 1, 'work': 1, 'Internet,[3': 1, '15': 2, 'popular': 1, 'websites': 1, 'ranked': 2, 'Alexa': 1, '2021': 2, '13th': 1, 'visited.[4][note': 1, '3': 1, 'project': 2, 'carries': 1, 'advertisements': 1, 'hosted': 1, 'Wikimedia': 1, 'Foundation': 1, 'American': 1, 'non': 1, 'profit': 1, 'organization': 1, 'funded': 1, 'mainly': 1, 'donations': 2, '80': 1, 'small': 1, 'individual': 1, 'users.[6': 1, '\n\n': 2, 'launched': 1, 'January': 1, '2001': 1, 'Jimmy': 2, 'Wales': 1, 'Larry': 1, 'Sanger': 1, 'coined': 1, 'portmanteau': 1, 'Initially': 1, 'available': 1, 'English': 2, 'versions': 1, 'languages': 1, 'qu

In [54]:
max_frequency = max(word_frequencies.values())
max_frequency

8

In [55]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

print(word_frequencies) 

{'Wikipedia': 1.0, '/ˌwɪkɪˈpiːdiə/': 0.125, 'soundlisten': 0.25, 'wik': 0.25, 'ih': 0.125, 'PEE': 0.125, 'dee': 0.125, 'ə': 0.125, '/ˌwɪki-/': 0.125, 'ee-': 0.125, 'free': 0.125, 'multilingual': 0.125, 'open': 0.25, 'collaborative': 0.125, 'online': 0.125, 'encyclopedia': 0.375, 'created': 0.125, 'maintained': 0.125, 'community': 0.125, 'volunteer': 0.125, 'contributors': 0.125, 'wiki': 0.25, 'based': 0.125, 'editing': 0.125, 'system': 0.125, 'largest': 0.25, 'general': 0.125, 'reference': 0.125, 'work': 0.125, 'Internet,[3': 0.125, '15': 0.25, 'popular': 0.125, 'websites': 0.125, 'ranked': 0.25, 'Alexa': 0.125, '2021': 0.25, '13th': 0.125, 'visited.[4][note': 0.125, '3': 0.125, 'project': 0.25, 'carries': 0.125, 'advertisements': 0.125, 'hosted': 0.125, 'Wikimedia': 0.125, 'Foundation': 0.125, 'American': 0.125, 'non': 0.125, 'profit': 0.125, 'organization': 0.125, 'funded': 0.125, 'mainly': 0.125, 'donations': 0.25, '80': 0.125, 'small': 0.125, 'individual': 0.125, 'users.[6': 0.125,

In [56]:
sentence_tokens = [sent for sent in doc.sents]
print(sentence_tokens)

[
Wikipedia (/ˌwɪkɪˈpiːdiə/ (About this soundlisten) wik-ih-PEE-dee-ə or /ˌwɪki-/ (About this soundlisten) wik-ee-) is a free, multilingual open-collaborative online encyclopedia created and maintained by a community of volunteer contributors using a wiki-based editing system., Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]

Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia"., Initially available only in English, versions in other languages were quickly developed., The English Wikipedia, with 6.3 million articles as of March 2021, is the largest of the

In [57]:
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent] += word_frequencies[word.text.lower()]
                
sentence_scores

{
 Wikipedia (/ˌwɪkɪˈpiːdiə/ (About this soundlisten) wik-ih-PEE-dee-ə or /ˌwɪki-/ (About this soundlisten) wik-ee-) is a free, multilingual open-collaborative online encyclopedia created and maintained by a community of volunteer contributors using a wiki-based editing system.: 4.125,
 Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]
 
 Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia".: 6.125,
 Initially available only in English, versions in other languages were quickly developed.: 0.625,
 The English Wikipedia, with 6.3 million articles as of March 

In [65]:
from heapq import nlargest

In [66]:
select_length = int(len(sentence_tokens)*0.3)
select_length

2

In [67]:
summary = nlargest(select_length, sentence_scores, key = sentence_scores.get)
summary

[Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]
 
 Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia".,
 Combined, Wikipedia's editions comprise more than 55 million articles, and attract more than 17 million edits and more than 1.7 billion unique visitors per month.[7]
 
 Wikipedia has been criticized for its uneven accuracy and for exhibiting systemic bias, particularly gender bias, with the majority of editors being male.[8]]

In [68]:
final_summary = [word.text for word in summary]
summary = ' '.join(final_summary) 

In [69]:
print(text)


Wikipedia (/ˌwɪkɪˈpiːdiə/ (About this soundlisten) wik-ih-PEE-dee-ə or /ˌwɪki-/ (About this soundlisten) wik-ee-) is a free, multilingual open-collaborative online encyclopedia created and maintained by a community of volunteer contributors using a wiki-based editing system. Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]

Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia". Initially available only in English, versions in other languages were quickly developed. The English Wikipedia, with 6.3 million articles as of March 2021, is the largest of the 319

In [70]:
print(summary)

Wikipedia is the largest general reference work on the Internet,[3] and one of the 15 most popular websites as ranked by Alexa; in 2021, it was ranked as the 13th most visited.[4][note 3] The project carries no advertisements and is hosted by the Wikimedia Foundation, an American non-profit organization funded mainly through donations, 80% of which are small donations from individual users.[6]

Wikipedia was launched on January 15, 2001, by Jimmy Wales and Larry Sanger; its name was coined as a portmanteau of "wiki" and "encyclopedia". Combined, Wikipedia's editions comprise more than 55 million articles, and attract more than 17 million edits and more than 1.7 billion unique visitors per month.[7]

Wikipedia has been criticized for its uneven accuracy and for exhibiting systemic bias, particularly gender bias, with the majority of editors being male.[8]


In [71]:
len(text), len(summary)

(1848, 866)

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetErro

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetErro

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\Asus vivoBook\Anaconda3\envs\tf\lib\asyncio\selector_events.py", line 120, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetErro