In [1]:
# ============================================
# IMPORTS
# ============================================
import os
import sys
import json
from typing import Any, Dict, List

from openai import OpenAI

print("Imports successful")

Imports successful


In [2]:
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()

# Initialize client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

print("‚úì OpenAI client initialized successfully!")


‚úì OpenAI client initialized successfully!


In [3]:
# ============================================
# VERIFY API KEY
# ============================================

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

print("‚úì Environment variables loaded successfully!")
print(f"‚úì OPENAI Token: {os.getenv('OPENAI_API_KEY')[:10]}...")

‚úì Environment variables loaded successfully!
‚úì OPENAI Token: sk-proj-HX...


In [4]:

# Change model if you want a bigger one (e.g. "gpt-4.1")
OPENAI_MODEL = "gpt-4.1-mini"

client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY")  # Make sure this is set in your environment
)


In [5]:
# ----------------------------
# 2. Structural QA Agent Prompt
# ----------------------------

STRUCTURAL_QA_SYSTEM_PROMPT = """
You are a Structural QA Agent for HTML promotional emails.

Your job:
- Assess the HTML structure of an email.
- Focus on structure, placement, order, and hierarchy of elements.

You must:
- Consider cross-client rendering (Outlook, Gmail, Apple Mail, mobile).
- Identify issues and categorize them by severity (critical, major, minor, info).

Check the following areas:

1) Document & Top-Level Structure
- Valid HTML skeleton (html > head > body).
- Only one <body>.
- Presence of a main wrapper table for layout.
- No unclosed tags or invalid nesting.

2) Layout Tables & Nesting
- Use of <table>, <tr>, <td> for layout.
- <tr> always inside <table>, <td> always inside <tr>.
- No <div> directly as child of <tr> (without <td>).
- No excessively deep unnecessary nesting that risks breaking layout.

3) Module / Section Order
- Typical order (if present): preheader, header/logo, hero, content modules, footer.
- For multi-module newsletters: ensure modules appear in logical order (Mod1, Mod2, Mod3).
- Verify that key content sections are not duplicated or misplaced.

4) Text Hierarchy & Semantics
- Logical heading structure (headline, subhead, body).
- Headings appear before their body text in DOM.
- Important information is text, not only inside images.

5) Images
- <img> has alt attribute (can be empty if decorative).
- width and height attributes set where appropriate.
- display:block for images used in layout tables to avoid gaps.
- Hero images and key images placed logically with headings and CTAs.

6) CTAs, Links, Buttons
- Buttons structured using bulletproof patterns (e.g., table + <a>).
- No nested <a> tags.
- Primary CTAs present in key modules.

7) Responsiveness & Mobile Structure
- Constrained main width (e.g., 600px container).
- Multi-column sections are structured in a way that they can stack on mobile.
- No essential content only in display:none blocks.

8) CSS for Structure
- Layout does not rely on flexbox or grid for core structure.
- Critical layout properties (width, padding, alignment) are inline or email-safe.

9) Accessibility & Reading Order
- DOM reading order matches intended visual order.
- Header -> main content -> secondary content -> footer.
- No large sequences of empty tags that may confuse screen readers.

10) Footer / Compliance Structure
- Footer appears at the end.
- Contains unsubscribe and legal/address content (if present in HTML).

Output STRICTLY in this JSON format:

{
  "summary": {
    "overall_status": "pass" | "fail" | "warning",
    "overall_comment": "short human-readable summary"
  },
  "metrics": {
    "total_issues": <int>,
    "critical": <int>,
    "major": <int>,
    "minor": <int>,
    "info": <int>
  },
  "issues": [
    {
      "id": "ISSUE-001",
      "severity": "critical" | "major" | "minor" | "info",
      "category": "Document Structure | Tables | Modules | Text Hierarchy | Images | CTAs | Responsiveness | CSS | Accessibility | Footer",
      "location_hint": "description of where in the HTML (e.g., 'main wrapper table', 'Module 2 image', 'footer section')",
      "description": "what is wrong structurally",
      "impact": "how this might break or degrade rendering across email clients",
      "recommendation": "specific, actionable fix"
    }
  ]
}

- If there are no issues, return total_issues = 0 and an empty issues array.
- Be concise but specific in descriptions and recommendations.
"""



In [6]:
# ----------------------------
# 3. Core Agent Call
# ----------------------------

def run_structural_qa_on_html(html_content: str) -> Dict[str, Any]:
    """
    Sends HTML content to the Structural QA Agent and returns a parsed JSON report.
    """

    response = client.chat.completions.create(
        model=OPENAI_MODEL,
        response_format={"type": "json_object"},
        messages=[
            {
                "role": "system",
                "content": STRUCTURAL_QA_SYSTEM_PROMPT.strip(),
            },
            {
                "role": "user",
                "content": html_content,
            },
        ],
    )

    raw_content = response.choices[0].message.content

    try:
        report = json.loads(raw_content)
    except json.JSONDecodeError:
        # If model returns something broken, wrap as a fallback
        report = {
            "summary": {
                "overall_status": "fail",
                "overall_comment": "Failed to parse JSON from model. See raw_output.",
            },
            "metrics": {
                "total_issues": 0,
                "critical": 0,
                "major": 0,
                "minor": 0,
                "info": 0,
            },
            "issues": [],
            "raw_output": raw_content,
        }

    return report

In [7]:
# ----------------------------
# 4. Reporting Helpers
# ----------------------------

def print_human_readable_report(report: Dict[str, Any]) -> None:
    """
    Pretty-prints a human-friendly version of the QA report to the console.
    """

    summary = report.get("summary", {})
    metrics = report.get("metrics", {})
    issues: List[Dict[str, Any]] = report.get("issues", [])

    print("=" * 80)
    print("STRUCTURAL QA REPORT")
    print("=" * 80)
    print(f"Overall status : {summary.get('overall_status', 'unknown')}")
    print(f"Summary        : {summary.get('overall_comment', '')}")
    print("-" * 80)
    print("Metrics:")
    print(f"  Total issues : {metrics.get('total_issues', len(issues))}")
    print(f"    Critical   : {metrics.get('critical', 0)}")
    print(f"    Major      : {metrics.get('major', 0)}")
    print(f"    Minor      : {metrics.get('minor', 0)}")
    print(f"    Info       : {metrics.get('info', 0)}")
    print("-" * 80)

    if not issues:
        print("No structural issues found. ‚úÖ")
        return

    print("Issues:")
    for i, issue in enumerate(issues, start=1):
        print(f"\n[{i}] {issue.get('id', 'ISSUE')}")
        print(f"  Severity     : {issue.get('severity', 'n/a')}")
        print(f"  Category     : {issue.get('category', 'n/a')}")
        print(f"  Location     : {issue.get('location_hint', 'n/a')}")
        print(f"  Description  : {issue.get('description', '')}")
        print(f"  Impact       : {issue.get('impact', '')}")
        print(f"  Recommendation:")
        print(f"    {issue.get('recommendation', '')}")
    print("\n" + "=" * 80)


def save_json_report(report: Dict[str, Any], output_path: str) -> None:
    """
    Saves the full JSON report to disk for later debugging or integration.
    """
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(report, f, indent=2, ensure_ascii=False)
    print(f"\nJSON report saved to: {output_path}")


In [9]:
# ----------------------------
# 5. File Handling & CLI
# ----------------------------

def read_html_file(file_path: str) -> str:
    """
    Reads the HTML file from the provided path.
    """
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"HTML file not found at: {file_path}")

    with open(file_path, "r", encoding="utf-8") as f:
        return f.read()


def main():
    """
    Usage:
        python structural_qa_agent.py path/to/email.html [optional_output.json]
    """
    if len(sys.argv) < 2:
        print("Usage: python structural_qa_agent.py path/to/email.html [output.json]")
        sys.exit(1)

    html_path = sys.argv[1]

    html_path = r'C:\Users\swade01\nxt24\InputsToQA\[ERS00725_Jul_MNL-MainEntity=Kansas City Chiefs Football Club Inc-M1=Transitioner-M2=Prospect_Rollin_NonRetiree-proof] Add this to your retirement checklist üñäÔ∏è.html'

    output_json_path = sys.argv[2] if len(sys.argv) > 2 else None

    print(f"Reading HTML from: {html_path}")
    html_content = read_html_file(html_path)

    print("Running Structural QA Agent via OpenAI‚Ä¶")
    report = run_structural_qa_on_html(html_content)

    print_human_readable_report(report)

    if output_json_path:
        save_json_report(report, output_json_path)


if __name__ == "__main__":
    main()

Reading HTML from: C:\Users\swade01\nxt24\InputsToQA\[ERS00725_Jul_MNL-MainEntity=Kansas City Chiefs Football Club Inc-M1=Transitioner-M2=Prospect_Rollin_NonRetiree-proof] Add this to your retirement checklist üñäÔ∏è.html
Running Structural QA Agent via OpenAI‚Ä¶
STRUCTURAL QA REPORT
Summary        : The HTML email has good structural foundation with proper top-level elements and main wrapper tables. Modules are generally well ordered; however, the module order is not sequential (Mod3 appears before Mod1), which may affect reading flow. Minor issues with image alt attributes and missing width/height on some images are present. Responsiveness and button structures follow best practices overall. No critical structural issues detected.
--------------------------------------------------------------------------------
Metrics:
  Total issues : 4
    Critical   : 0
    Major      : 1
    Minor      : 2
    Info       : 1
-----------------------------------------------------------------------