In [3]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup

In [15]:
raw_html = get('http://flex.keeneland.com/summaries/summaries.html', stream=True)

In [19]:
raw_html.headers['Content-Type']

'text/html'

In [36]:
def get_html(url):
    try:
        with closing(get(url,stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                if resp.status_code != 200:
                    return 'Error: Status Code {0}'.format(resp.status_code)
                else:
                    return 'Error: Content Type = {0}'.format(resp.headers['Content-Type'])
    except RequestException as e:
        print('Error during requests to {0}: {1}'.format(url,str(e)))
        
def is_good_response(resp):
    content_type = resp.headers['Content-Type'].lower()
    return(resp.status_code == 200 and content_type is not None and content_type.find('html') > -1)

In [59]:
raw_html = get_html('http://www.fabpedigree.com/james/mathmen.htm')
html = BeautifulSoup(raw_html,'lxml')
for i,li in enumerate(html.select('li')):
    print(i,li.text)

0  Isaac Newton

1  Archimedes

2  Carl F. Gauss

3  Leonhard Euler

4  Bernhard Riemann

5  Henri Poincaré

6  Joseph-Louis Lagrange

7  Euclid  of Alexandria

8  David Hilbert

9  Gottfried W. Leibniz

10  Alexandre Grothendieck

11  Pierre de Fermat

12  Évariste Galois

13  John von Neumann

14  René Descartes

15  Karl W. T. Weierstrass

16  Srinivasa Ramanujan

17  Hermann K. H. Weyl

18  Peter G. L. Dirichlet

19  Niels Abel

20  Georg Cantor

21  Carl G. J. Jacobi

22  Brahmagupta

23  Augustin Cauchy

24  Arthur Cayley

25  Emmy Noether

26  Pythagoras  of Samos

27  Aryabhata

28  Leonardo `Fibonacci'

29  William R. Hamilton

30  Apollonius  of Perga

31  Charles Hermite

32  Pierre-Simon Laplace

33  Carl Ludwig Siegel

34  Diophantus  of Alexandria

35  Muhammed al-Khowârizmi

36  Richard Dedekind

37  Kurt Gödel

38  Bháscara (II) Áchárya

39  Felix Christian Klein

40  Blaise Pascal

41  Élie Cartan

42  Archytas  of Tarentum

43  Godfrey H. Hardy

44  Alhazen ibn al-Hay

In [68]:
names = []
for li in html.select('li'):
    names.append(li.text.rstrip())
#clean up 74
names[74] = names[74].split('\n')[0]

In [94]:
def get_hits(name):
    url = 'https://xtools.wmflabs.org/articleinfo/en.wikipedia.org/{0}'.format(name)
    resp = get_html(url)
    html = BeautifulSoup(resp,'lxml')
    for a in html.select('a'):
        if a['href'].find('latest-60') > -1:
            return int(a.text.replace(',',''))

In [82]:
def get_hits_on_name(name):
    """
    Accepts a `name` of a mathematician and returns the number
    of hits that mathematician's Wikipedia page received in the 
    last 60 days, as an `int`
    """
    # url_root is a template string that is used to build a URL.
    url_root = 'https://xtools.wmflabs.org/articleinfo/en.wikipedia.org/{}'
    response = get_html(url_root.format(name))

    if response is not None:
        html = BeautifulSoup(response, 'html.parser')

        hit_link = [a for a in html.select('a')
                    if a['href'].find('latest-60') > -1]

        if len(hit_link) > 0:
            # Strip commas
            link_text = hit_link[0].text.replace(',', '')
            try:
                # Convert to integer
                return int(link_text)
            except:
                log_error("couldn't parse {} as an `int`".format(link_text))

    log_error('No pageviews found for {}'.format(name))
    return None

In [96]:
result = []
for name in names:
    result.append((name,get_hits(name)))
print(result)

[(' Isaac Newton', 567874), (' Archimedes', 165440), (' Carl F. Gauss', 54), (' Leonhard Euler', 99532), (' Bernhard Riemann', 29416), (' Henri Poincaré', 40128), (' Joseph-Louis Lagrange', 25955), (' Euclid  of Alexandria', 374), (' David Hilbert', 40624), (' Gottfried W. Leibniz', None), (' Alexandre Grothendieck', 145), (' Pierre de Fermat', 23540), (' Évariste Galois', 33882), (' John von Neumann', 129920), (' René Descartes', 215046), (' Karl W. T. Weierstrass', None), (' Srinivasa Ramanujan', 380505), (' Hermann K. H. Weyl', None), (' Peter G. L. Dirichlet', None), (' Niels Abel', 172), (' Georg Cantor', 44014), (' Carl G. J. Jacobi', 10), (' Brahmagupta', 38344), (' Augustin Cauchy', 3465), (' Arthur Cayley', 5802), (' Emmy Noether', 31602), (' Pythagoras  of Samos', 333), (' Aryabhata', 176173), (" Leonardo `Fibonacci'", None), (' William R. Hamilton', None), (' Apollonius  of Perga', 8329), (' Charles Hermite', 3942), (' Pierre-Simon Laplace', 38580), (' Carl Ludwig Siegel', 1

In [100]:
for i in result:
    if i[1] == None:
        result.remove(i)
def take_second(elem):
    return elem[1]
final_result = sorted(result,reverse=True, key=take_second)
for i in range(0,5):
    print(i+1,final_result[i][0],':  ',final_result[i][1])

1  Albert Einstein :   1075715
2  Isaac Newton :   567874
3  Aristotle :   538986
4  Galileo Galilei :   412698
5  Srinivasa Ramanujan :   380505
