In [None]:
import wikipedia

def filter_olympic_2020_titles(titles):
    """
    Get the titles which are related to Olympic games hosted in 2020, given a list of titles
    """
    titles = [title for title in titles if '2020' in title and 'olympic' in title.lower()]

    return titles

def get_wiki_page(title):
    """
    Get the wikipedia page given a title
    """
    try:
        return wikipedia.page(title)
    except wikipedia.exceptions.DisambiguationError as e:
        return wikipedia.page(e.options[0])
    except wikipedia.exceptions.PageError as e:
        return None
    except wikipedia.exceptions.ReadTimeout:
        return None

def recursively_find_all_pages(titles, titles_so_far=set()):
    """
    Recursively find all the pages that are linked to the Wikipedia titles in the list
    """
    all_pages = []

    titles = list(set(titles) - titles_so_far)
    titles = filter_olympic_2020_titles(titles)
    titles_so_far.update(titles)
    for title in titles:
        print(title + " " + str(len(all_pages)))    # log to check progress
        page = get_wiki_page(title)
        if page is None:
            continue
        all_pages.append(page)

        new_pages = recursively_find_all_pages(page.links, titles_so_far)
        for pg in new_pages:
            if pg.title not in [p.title for p in all_pages]:
                all_pages.append(pg)
        titles_so_far.update(page.links)
    return all_pages


pages = recursively_find_all_pages(["2020 Summer Olympics"])
len(pages)

2020 Summer Olympics 0
Tanzania at the 2020 Summer Olympics 0
Athletics at the 2020 Summer Olympics – Men's marathon 0
Athletics at the 2020 Summer Olympics – Men's decathlon 0
Athletics at the 2020 Summer Olympics – Women's 400 metres hurdles 1
2020 United States Olympic Trials (track and field) 0
Athletics at the 2020 Summer Olympics – Women's 3000 metres steeplechase 2
Athletics at the 2020 Summer Olympics – Men's discus throw 3
Athletics at the 2020 Summer Olympics – Women's long jump 4
Athletics at the 2020 Summer Olympics – Men's javelin throw 5
Athletics at the 2020 Summer Olympics – Men's 10,000 metres 6
Athletics at the 2020 Summer Olympics – Women's 4 × 400 metres relay 6
Athletics at the 2020 Summer Olympics – Men's 400 metres hurdles 7
Athletics at the 2020 Summer Olympics – Women's 200 metres 8
Athletics at the 2020 Summer Olympics – Men's triple jump 9
Athletics at the 2020 Summer Olympics – Mixed 4 × 400 metres relay 10
Athletics at the 2020 Summer Olympics – Men's 1500 