In [None]:
import re

def replace_html_body(html_content, new_body_content):
    """
    Replace the content of the <body> element in HTML with new content.
    
    Args:
        html_content (str): Original HTML content
        new_body_content (str): New content to insert into the body
        
    Returns:
        str: HTML content with the body replaced
    """
    # Pattern to match the body content, including the body tags
    # This uses a non-greedy match to find content between <body> and </body> tags
    body_pattern = r'<body[^>]>(.?)</body>'
    
    # Replace the body content, keeping the original body tag with its attributes
    def replacement(match):
        # Get the original opening body tag
        opening_tag = re.search(r'<body[^>]*>', match.group(0)).group(0)
        # Return the opening tag, new content, and closing body tag
        return f'{opening_tag}{new_body_content}</body>'
    
    # Perform the replacement
    modified_html = re.sub(body_pattern, replacement, html_content, flags=re.DOTALL)
    
    return modified_html

In [1]:
import re

def remove_html_styles(html_content):
    """
    Remove all style-related content from HTML.
    This function removes:
    1. <style> tags and their content
    2. Style attributes from HTML elements
    3. <link> tags that reference CSS files
    
    Args:
        html_content (str): Original HTML content
        
    Returns:
        str: HTML content with all styles removed
    """
    modified_html = html_content
    
    # 1. Remove <style> tags and their content
    style_pattern = r'<style[^>]*>.*?</style>'
    modified_html = re.sub(style_pattern, '', modified_html, flags=re.DOTALL)
    
    # 2. Remove style attributes from HTML elements
    style_attr_pattern = r' style="[^"]*"'
    modified_html = re.sub(style_attr_pattern, '', modified_html)
    
    # 3. Remove <link> tags that reference CSS files
    css_link_pattern = r'<link[^>]*rel=["\']\s*stylesheet\s*["\'][^>]*>'
    modified_html = re.sub(css_link_pattern, '', modified_html)
    
    # Additional: Remove class attributes (optional, only if you want to remove all visual styling)
    # class_attr_pattern = r' class="[^"]*"'
    # modified_html = re.sub(class_attr_pattern, '', modified_html)
    
    return modified_html

In [21]:
with open('../templates/index.html', 'r', encoding='utf-8') as f:
  file = f.read()

In [25]:
print(remove_html_styles(file))

<!DOCTYPE html>
<html lang="en">

<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Modern Landing Page</title>
  
</head>

<body>
  <!-- Header & Navigation -->
  <header>
    <div class="container">
      <nav>
        <a href="#" class="logo">Quantum</a>
        <div class="menu-toggle" id="menuToggle">
          <span class="bar"></span>
          <span class="bar"></span>
          <span class="bar"></span>
        </div>
        <div class="nav-links" id="navLinks">
          <a href="#features">Features</a>
          <a href="#testimonials">Testimonials</a>
          <a href="#pricing">Pricing</a>
          <a href="#contact">Contact</a>
        </div>
        <button class="btn">Get Started</button>
      </nav>
    </div>
  </header>

  <!-- Hero Section -->
  <section class="hero">
    <div class="container">
      <div class="hero-content">
        <div class="hero-text">
          <h1>Modern Solution for Your B