#### Import Libraries


In [28]:
import requests
from bs4 import BeautifulSoup


### WikiURL

In [29]:
WIKI_URL = "https://aliceinwonderland.fandom.com/wiki/Alice_in_Wonderland_Wiki"


### Fetch and Parse Helper Function

In [30]:
def fetch_soup(url):
    """
    Fetches the content of the given URL and returns a BeautifulSoup object.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for HTTP errors
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return None


### Books Function


In [31]:
def books():
    """
    Extracts and returns the canon book names from the wiki page.
    """
    soup = fetch_soup(WIKI_URL)
    if not soup:
        return []

    canon_books = []

    # Locate the "Canon books" section
    book_section = soup.find('span', string="Canon books")
    if not book_section:
        print("Canon books section not found.")
        return canon_books

    # Extract the <ul> following the "Canon books" header
    book_list = book_section.find_next('ul')
    if not book_list:
        print("Canon books list not found.")
        return canon_books

    # Extract the book names from the <li> elements
    for li in book_list.find_all('li'):
        book_name = li.get_text(strip=True)
        canon_books.append(book_name)

    return canon_books


### Poems Function


In [32]:
def poems():
    """
    Extracts and returns a list of tuples containing canon poem names and their URLs.
    """
    soup = fetch_soup(WIKI_URL)
    if not soup:
        return []

    canon_poems = []

    # Locate the "Canon poems" section
    poem_section = soup.find('span', string="Canon poems")
    if not poem_section:
        print("Canon poems section not found.")
        return canon_poems

    # Extract the <ul> following the "Canon poems" header
    poem_list = poem_section.find_next('ul')
    if not poem_list:
        print("Canon poems list not found.")
        return canon_poems

    # Extract the poem names and URLs from the <li> elements
    for li in poem_list.find_all('li'):
        a_tag = li.find('a')
        if a_tag and 'href' in a_tag.attrs:
            poem_title = a_tag.get_text(strip=True)
            poem_url = a_tag['href']
            if not poem_url.startswith("https"):
                poem_url = "https://aliceinwonderland.fandom.com" + poem_url
            canon_poems.append((poem_title, poem_url))

    return canon_poems


### Poem Title Function

In [33]:
def poem_title_text(n):
    """
    Given an index n, returns the title and full text of the nth canon poem.
    """
    canon_poems = poems()
    if not canon_poems:
        print("No poems found.")
        return None, None

    if n < 0 or n >= len(canon_poems):
        print(f"Index {n} is out of range. There are {len(canon_poems)} poems.")
        return None, None

    title, relative_url = canon_poems[n]
    
    # Only add the base URL if the URL is relative
    if not relative_url.startswith("http"):
        full_url = "https://aliceinwonderland.fandom.com" + relative_url
    else:
        full_url = relative_url

    soup = fetch_soup(full_url)
    if not soup:
        return title, None

    # Extract the poem text
    poem_text = ""
    content_div = soup.find('div', class_='mw-parser-output')
    if not content_div:
        print("Poem content not found.")
        return title, poem_text

    # Concatenate all paragraph texts
    for p in content_div.find_all('p'):
        paragraph = p.get_text(strip=True)
        if paragraph:
            poem_text += paragraph + "\n\n"

    return title, poem_text.strip()


### Testing functions

In [34]:
def test_books_and_poems():
    """
    Tests the extraction of canon books and poems, printing them with expected formatting.
    Also tests retrieving poem title and text.
    """
    # Test canon books extraction
    print("Canon Books:")
    books_list = books()
    if books_list:
        for book in books_list:
            print(f"- {book}")
    else:
        print("No canon books found.")

    # Test canon poems extraction
    print("\nCanon Poems:")
    poems_list = poems()
    if poems_list:
        for idx, (title, url) in enumerate(poems_list, start=0):
            print(f"{idx}. {title}: {url}")
    else:
        print("No canon poems found.")

def test_poem_title_text(n):
    """
    Tests the poem_title_text function by printing the title and text of the nth poem.
    """
    print(f"\nTesting poem at index {n}:")
    title, text = poem_title_text(n)
    
    if title and text:
        print(f"Poem {n} Title: {title}\n")
        print(f"Poem Text:\n{text}")
    else:
        print(f"Poem at index {n} could not be retrieved.")

# Test the books and poems extraction
test_books_and_poems()

# Test poem title and text for a specific index (e.g., 0 for the first poem)
test_poem_title_text(3)



Canon Books:
- Alice's Adventures in Wonderland
- Through the Looking-Glass, and What Alice Found There
- Alice's Adventures Underground

Canon Poems:
0. Jabberwocky: https://aliceinwonderland.fandom.com/wiki/Jabberwocky
1. How Doth the Little Crocodile: https://aliceinwonderland.fandom.com/wiki/How_Doth_the_Little_Crocodile
2. The Walrus and the Carpenter: https://aliceinwonderland.fandom.com/wiki/The_Walrus_and_the_Carpenter_(poem)
3. You Are Old, Father William: https://aliceinwonderland.fandom.com/wiki/You_Are_Old,_Father_William
4. Humpty Dumpty's Recitation: https://aliceinwonderland.fandom.com/wiki/Humpty_Dumpty%27s_Recitation
5. Turtle Soup: https://aliceinwonderland.fandom.com/wiki/Turtle_Soup
6. Tis the Voice of the Lobster: https://aliceinwonderland.fandom.com/wiki/Tis_the_Voice_of_the_Lobster

Testing poem at index 3:
Poem 3 Title: You Are Old, Father William

Poem Text:
You Are Old, Father Williamis a poem by Lewis Carroll that appears in his bookAlice's Adventures in Wond