In [5]:
from bs4 import BeautifulSoup
import requests

def extract_and_write_info_to_file(url):
    # Set a user-agent header to mimic a web browser
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
    }

    # Make the GET request with the headers
    source = requests.get(url, headers=headers).text
    soup = BeautifulSoup(source, 'lxml')
    # Generate a file name based on the URL
    file_name = url.split('/')[-1] + '.txt'

    with open(file_name, 'w', encoding='utf-8') as file:
        # Part 1: Extract and write title and lead paragraph
        title_element = soup.find('h1')
        title_text_parts = [part.strip() for part in title_element.text.split('\n')]
        title_text = ' '.join(title_text_parts)
        file.write(title_text + '\n')

        lead_paragraph = soup.find('p', class_='lead')
        lead_text = lead_paragraph.text.strip()
        file.write(lead_text + '\n\n')

        # Part 2: Extract and write content from sections with class "heading"
        sections = soup.find_all('div', class_='heading')

        for section in sections:
            heading = section.h2.text.strip()
            file.write(heading + '\n')

            content = section.find_next_siblings(['p', 'ul'])

            for element in content:
                if element.name == 'p':
                    file.write(element.text.strip() + '\n')
                elif element.name == 'ul':
                    for li in element.find_all('li'):
                        file.write(li.text.strip() + '\n')

            file.write('\n')
        
         # Part 3: Handle "Application Steps" section
        application_steps_section = soup.find('section', class_='entry-content stepped-content')
        if application_steps_section:
            application_steps_title = application_steps_section.h2.text.strip()
            file.write(application_steps_title + '\n')
            
            for item in application_steps_section.find_all(['p', 'ul', 'ol']):
                text = item.get_text(strip=True)
                if text and text[0].isdigit():
                    text = f"{text[0]}.{text[1:]}"
                file.write(text + '\n')

        # Part 4: Extract and write links to the file
        file.write("Links:\n")
        # Find the "Application Steps" section by inspecting the HTML structure and class
        application_steps_section = soup.find('div', class_='modal-body')

        # Create a dictionary to store subheadings and their associated content
        subheading_content = {}

        if application_steps_section:
            # Find all the h4 elements within the section
            step_headers = application_steps_section.find_all('h4')

            for step in step_headers:
                step_title = step.text.strip()

                # Find the associated content using the href attribute of the 'a' element
                link = step.find('a')

                if link:
                    link_url = link.get('href', '')

                    if link_url:
                        content = application_steps_section.find(id=link_url.lstrip('#'))

                        if content:
                            content_text = content.find('p').text.strip()
                            subheading_content[step_title] = content_text

        # Find all 'a' elements within the provided HTML
        a_links = soup.find_all('a')

        # Iterate through the 'a' elements and write their href and text to the file
        for a in a_links:
            link_text = a.get_text(strip=True)
            link_url = a.get('href', '')

            if link_url:
                # If the link has an href, check if it matches a subheading
                if link_text in subheading_content:
                    file.write(f"{link_text} -> {subheading_content[link_text]}\n")
                else:
                    # If it doesn't match a subheading, write the link URL
                    file.write(f"{link_text} -> {link_url}\n")
            else:
                # If the link doesn't have an href, set it as a subheading
                subheading = link_text
        
       

if __name__ == "__main__":
    url = input("Enter the URL: ")
    extract_and_write_info_to_file(url)
