In [103]:
import requests
import re
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize, sent_tokenize

In [104]:
wiki_html = requests.get('https://en.wikipedia.org/wiki/Machine_learning').text

In [142]:
wiki_html2 = requests.get('https://en.wikipedia.org/wiki/Special:Random').text
wiki_html2

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>Trade in Endangered Species Act 1989 - Wikipedia</title>\n<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"f719bee8-9ac3-40cc-bd53-28f5563bb12f","wgCSPNonce":false,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Trade_in_Endangered_Species_Act_1989","wgTitle":"Trade in Endangered Species Act 1989","wgCurRevisionId":1012625737,"wgRevisionId":1012625737,"wgArticleId":27894534,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Use dmy dates from August 2013","Use New Zealand English from August 2013","A

In [105]:
wiki_parsed = BeautifulSoup(wiki_html, 'html.parser')

In [106]:
paragraphs = wiki_parsed.find_all('p')
article_content = ''
for p in paragraphs:  
    article_content += re.sub(r"\[[\S]*\]", "", p.text)

In [107]:
article_content

'Machine learning (ML) is a field of inquiry devoted to understanding and building methods that \'learn\', that is, methods that leverage data to improve performance on some set of tasks. It is seen as a part of artificial intelligence. Machine learning algorithms build a model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to do so. Machine learning algorithms are used in a wide variety of applications, such as in medicine, email filtering, speech recognition, and computer vision, where it is difficult or unfeasible to develop conventional algorithms to perform the needed tasks.\nA subset of machine learning is closely related to computational statistics, which focuses on making predictions using computers, but not all machine learning is statistical learning. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a related fiel

In [108]:
stop_words = set(stopwords.words("english"))

In [109]:
words = word_tokenize(article_content)

In [110]:
stem = PorterStemmer()

In [112]:
frequency_table = dict()
for wd in words:
    wd = stem.stem(wd)
    if wd in stop_words:
        continue
    if wd in frequency_table:
        frequency_table[wd] += 1
    else:
        frequency_table[wd] = 1

In [113]:
frequency_table

{'machin': 113,
 'learn': 208,
 '(': 64,
 'ml': 7,
 ')': 64,
 'field': 21,
 'inquiri': 1,
 'devot': 1,
 'understand': 2,
 'build': 5,
 'method': 33,
 "'learn": 1,
 "'": 6,
 ',': 377,
 'leverag': 1,
 'data': 96,
 'improv': 10,
 'perform': 28,
 'set': 36,
 'task': 20,
 '.': 306,
 'seen': 2,
 'part': 5,
 'artifici': 24,
 'intellig': 9,
 'algorithm': 71,
 'model': 61,
 'base': 10,
 'sampl': 5,
 'known': 11,
 'train': 62,
 'order': 6,
 'make': 14,
 'predict': 29,
 'decis': 16,
 'without': 9,
 'explicitli': 3,
 'program': 22,
 'use': 63,
 'wide': 2,
 'varieti': 4,
 'applic': 11,
 'medicin': 1,
 'email': 4,
 'filter': 3,
 'speech': 4,
 'recognit': 6,
 'comput': 35,
 'vision': 5,
 'difficult': 2,
 'unfeas': 1,
 'develop': 7,
 'convent': 2,
 'need': 9,
 'subset': 6,
 'close': 3,
 'relat': 12,
 'statist': 18,
 'focus': 4,
 'studi': 7,
 'mathemat': 9,
 'optim': 8,
 'deliv': 3,
 'theori': 14,
 'domain': 3,
 'mine': 11,
 'exploratori': 1,
 'analysi': 12,
 'unsupervis': 13,
 'implement': 3,
 'neural

In [115]:
sentences = sent_tokenize(article_content)
sentences

["Machine learning (ML) is a field of inquiry devoted to understanding and building methods that 'learn', that is, methods that leverage data to improve performance on some set of tasks.",
 'It is seen as a part of artificial intelligence.',
 'Machine learning algorithms build a model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to do so.',
 'Machine learning algorithms are used in a wide variety of applications, such as in medicine, email filtering, speech recognition, and computer vision, where it is difficult or unfeasible to develop conventional algorithms to perform the needed tasks.',
 'A subset of machine learning is closely related to computational statistics, which focuses on making predictions using computers, but not all machine learning is statistical learning.',
 'The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning.',
 'Data mi

In [124]:
sentence_weight = dict()

In [135]:
for sentence in sentences:
    sentence_wordcount_without_stop_words = 0
    for word_weight in frequency_table:
        if word_weight in sentence.lower():
            sentence_wordcount_without_stop_words += 1
            if sentence[:15] in sentence_weight:
                sentence_weight[sentence[:15]] += frequency_table[word_weight]
            else:
                sentence_weight[sentence[:15]] = frequency_table[word_weight]

    sentence_weight[sentence[:15]] = sentence_weight[sentence[:15]] / sentence_wordcount_without_stop_words

In [136]:
sentence_weight

{'Machine learnin': 41.523257503028525,
 'It is seen as a': 27.78061224489796,
 'A subset of mac': 53.34201388888889,
 'The study of ma': 51.402646502835545,
 'Data mining is ': 63.27469135802468,
 'Some implementa': 35.066326530612244,
 'In its applicat': 48.4375,
 'Learning algori': 39.17219387755102,
 'These inference': 28.46090534979424,
 'They can be nua': 22.511322132943757,
 'It involves com': 43.043209876543216,
 'For simple task': 33.58316896778435,
 'For more advanc': 44.2575,
 'In practice, it': 36.213010204081634,
 'The discipline ': 26.94140625,
 'In cases where ': 35.06994328922496,
 'This can then b': 31.663265306122447,
 'For example, to': 36.1904296875,
 'The term machin': 34.58938775510204,
 'Also the synony': 27.479224376731306,
 'A representativ': 34.413061224489795,
 'Interest relate': 33.420138888888886,
 'In 1981 a repor': 28.543388429752067,
 'Tom M. Mitchell': 24.55410225921522,
 'This follows Al': 23.390946502057613,
 'is replaced wit': 28.17391304347826,
 'Mo

In [137]:
sum_values = 0
for entry in sentence_weight:
    sum_values += sentence_weight[entry]
average_score = (sum_values / len(sentence_weight))

In [138]:
average_score

34.164663060488145

In [139]:
sentence_counter = 0
article_summary = ''

In [140]:
for sentence in sentences:
    if sentence[:15] in sentence_weight and sentence_weight[sentence[:15]] >= 1.5 * average_score:
        article_summary += " " + sentence
        sentence_counter += 1

In [141]:
article_summary

' A subset of machine learning is closely related to computational statistics, which focuses on making predictions using computers, but not all machine learning is statistical learning. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning. Data mining is a related field of study, focusing on exploratory data analysis through unsupervised learning. As a scientific endeavor, machine learning grew out of the quest for artificial intelligence. In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. Some statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. In computational learning theory, a computation is co