In [None]:
%pip install -U -q beautifulsoup4

In [None]:
rank = "tiger"

base_url = f"https://www.scouting.org/programs/cub-scouts/adventures/{rank}/"



In [None]:
from bs4 import BeautifulSoup
import requests


def get_html_document(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    return response.text


def get_adventure_highlights():
    html_document = get_html_document(base_url)
    soup = BeautifulSoup(html_document, "html.parser")

    # get all adventures
    adventures = soup.find_all("div", {"class": f"cs-adv-rank-{rank}"})

    adventure_hightlights = [
        (
            "Bobcat",
            f"https://www.scouting.org/cub-scout-adventures/bobcat-{rank}/",
            True,
        )
    ]
    for adventure in adventures:
        classes = adventure.get("class", [])
        required = "cs-adv-type-required" in classes

        details_link = adventure.find("a")

        adventure_hightlights.append(
            (details_link.text, details_link.get("href"), required)
        )

    return adventure_hightlights

def get_activity_details(url):
    html_document = get_html_document(url)
    soup = BeautifulSoup(html_document, "html.parser")

    activity_description = soup.find("section", {"class": "adv-act"}).find("p").text

    requirements = soup.find("section", {"class": "xadv-requirements"})
    supplies = requirements.find("ul").text

    directions = requirements.find_all("div", { "class": "pp-accordion-tab-content"})[1].text
    
    return (activity_description, supplies, directions)


def get_adventure_details(adventure_hightlight):
    name, url, required = adventure_hightlight

    html_document = get_html_document(url)
    soup = BeautifulSoup(html_document, "html.parser")

    adventure_name = soup.find("h1").text
    adventure_description = (
        soup.find("section", {"class": "adv-requirements-snapshot"}).find("p").text
    )

    requirements = soup.find_all("section", {"class": "adv-requirements"})
    details = []
    for requirement in requirements[2:]:
        requirement_name = requirement.find("h2").text
        requirement_description = requirement.find("p").text
        
        activities = requirement.find_all("article")
        for activity in activities:
            activity_link = activity.find("a")

            attributes = [
                val.text.strip()
                for val in activity.find_all(
                    "div", {"class": "elementor-icon-box-title"}
                )[1:]
            ]

            activity_name = activity_link.text
            activity_link = activity_link.get("href")
            
            activity_details = get_activity_details(activity_link)
        
            details.append((adventure_name, adventure_description, requirement_name, requirement_description, activity_name, activity_link, activity_details[0], activity_details[1], activity_details[2]))
                
    return details

In [None]:
adventure_highlights = get_adventure_highlights()

for adventure in adventure_highlights[:1]:
    print(adventure)
    details = get_adventure_details(adventure)
    for detail in details:
        print(detail)
