In [3]:
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin

def download_files_from_page(url, download_folder="./test"):
    # Create the folder if it doesn't exist
    if not os.path.exists(download_folder):
        os.makedirs(download_folder)

    # Send a GET request to the page
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the page, status code: {response.status_code}")
        return
    
    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find all links that potentially lead to downloadable files
    links = soup.find_all('a', href=True)

    print(links)
    
    # Filter links that appear to be files (by checking for file extensions in the href)
    # You can adjust this filter to match the file types you're interested in (e.g., .pdf, .docx)
    file_links = [link for link in links if link['href'].lower().endswith(('pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx'))]

    # Loop through the file links and download each file
    for link in file_links:
        file_url = urljoin(url, link['href'])  # Get the full URL for the file
        file_name = os.path.join(download_folder, link['href'].split('/')[-1])  # Save with original file name

        # Send a GET request to download the file
        file_response = requests.get(file_url)

        if file_response.status_code == 200:
            with open(file_name, 'wb') as file:
                file.write(file_response.content)
            print(f"Downloaded: {file_name}")
        else:
            print(f"Failed to download {file_url} - Status code: {file_response.status_code}")

# Example usage:
download_files_from_page('https://meetings.boardbook.org/Public/Agenda/3184?meeting=677578', download_folder='./test')


[<a class="" href="#DisplayHeader">skip to main content</a>, <a aria-label="search" href="/Search/Index/3184?returnurl=/Public/Agenda/3184?meeting=677578" tabindex="-1">
<i class="fa fa-search sparq-menu-link"></i>
</a>, <a class="showHelpInDialog" href="/Help/SectionPublicAgenda">
<i class="fa fa-question-circle"></i>
                            Agenda Help
                        </a>, <a class="showHelpInDialog" href="/SparqMenu/HelpForMenuBar">
<i class="fa fa-question-circle"></i>
                            BoardBook Menu Bar Help
                        </a>, <a href="/SparqMenu/SiteCompatibility">
<i class="fa fa-question"></i>
                            Check Your Device's Compatibility
                        </a>, <a class="showHelpInDialog" href="/SparqMenu/HelpForNightlyMaintenanceWindow">
<i class="fa fa-wrench"></i>
                            Nightly Maintenance Window: Midnight to 5 AM Central Time
                        </a>, <a class="mainLogoLink" href="https://ww