In [None]:
# Python script to retrieve an arbitrary Wikipedia page
# and produces a list of links on that page:

from urllib.request import urlopen
from bs4 import BeautifulSoup
html = urlopen('http://en.wikipedia.org/wiki/Kevin_Bacon')
bs = BeautifulSoup(html, 'html.parser')
for link in bs.find_all('a'):
    if 'href' in link.attrs:
        print(link.attrs['href'])

In [None]:
# However, there are some things that you don’t want to retrieve
# Examining the links that point to article pages (as opposed to other internal pages)
# it is possible to fine 3 aspects in common:

# They reside within <div id='bodyContent'></div>;
# The URLs does not contain colons;
# The URLs begin with /wiki/

# Revising the code to retrieve only the desired article links with the expression:
# ^(/wiki/)((?!:).)*$"):

from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
html = urlopen('http://en.wikipedia.org/wiki/Kevin_Bacon')
bs = BeautifulSoup(html, 'html.parser')
for link in bs.find('div', {'id':'bodyContent'}).find_all(
    'a', href=re.compile('^(/wiki/)((?!:).)*$')):
    if 'href' in link.attrs:
        print(link.attrs['href'])
        
# list of all article URLs that the Wikipedia article on Kevin Bacon links to

In [None]:
# Taking the next step, we should take this code and transform into:

# A single function,getLinks (you can use another name if waht to), that takes in a Wikipedia article 
# URL of the form /wiki/ and returns a list of all linked article URLs in the same form.

# A main function that calls getLinks with a starting article, chooses a random article link 
# from the returned list, and calls getLinks again, until you stop the program or until no 
# article links are found on the new page. 

# Generating this code:

from urllib.request import urlopen
from bs4 import BeautifulSoup
import datetime
import random
import re

# random.seed(datetime.datetime.now()) #code from the book, but not need in this part
seed = None #code created to substitute the code from the book., because datetime will cause a warning!!!
random.seed(seed) #code created to substitute the code from the book, because datetime will cause a warning!!!

def getLinks(articleUrl):
    html = urlopen('http://en.wikipedia.org{}'.format(articleUrl))
    bs = BeautifulSoup(html, 'html.parser')
    return bs.find('div', {'id':'bodyContent'}).find_all('a',
        href=re.compile('^(/wiki/)((?!:).)*$'))

links = getLinks('/wiki/Kevin_Bacon')
while len(links) > 0:
    newArticle = links[random.randint(0, len(links)-1)].attrs['href']
    print(newArticle)
    links = getLinks(newArticle)