In [7]:
import json

# Define important security and performance headers
important_headers = {
    "security": [
        "Strict-Transport-Security",
        "Content-Security-Policy",
        "X-Content-Type-Options",
        "X-Frame-Options",
        "X-XSS-Protection",
        "Referrer-Policy",
        "Permissions-Policy",
        "Set-Cookie"
    ],
    "performance": [
        "Cache-Control",
        "Expires",
        "ETag",
        "Last-Modified",
        "Age",
        "Transfer-Encoding",
        "Content-Encoding",
        "Content-Length",
        "Accept-Ranges",
        "Vary",
        "x-cache-sys",
        "x-l1-cache"
    ]
}

# Load the JSON file
with open('url_headers.json', 'r') as json_file:
    data = json.load(json_file)

# Function to filter important headers
def filter_important_headers(headers, header_types):
    filtered_headers = {}
    for header_type, header_list in header_types.items():
        filtered_headers[header_type] = {header: headers[header] for header in header_list if header in headers}
    return filtered_headers

# Process each URL and its headers
filtered_results = []
for item in data:
    url = item['url']
    headers = item['headers']
    if headers:
        important_headers_filtered = filter_important_headers(headers, important_headers)
        filtered_results.append({'url': url, 'important_headers': important_headers_filtered})
    else:
        filtered_results.append({'url': url, 'important_headers': None, 'error': item.get('error', 'Unknown error')})

# Save the filtered results to a new JSON file
with open('header_important.json', 'w') as json_file:
    json.dump(filtered_results, json_file, indent=4)

# Generate HTML report
html_content = "<html><head><title>Important URL Headers</title></head><body><table border='1'>"
html_content += "<tr><th>URL</th><th>Security Headers</th><th>Performance Headers</th></tr>"

for result in filtered_results:
    if result['important_headers'] is not None:
        security_headers = result['important_headers']['security']
        performance_headers = result['important_headers']['performance']
        
        security_headers_str = "<br>".join([f"{k}: {v}" for k, v in security_headers.items()])
        performance_headers_str = "<br>".join([f"{k}: {v}" for k, v in performance_headers.items()])
    else:
        security_headers_str = "N/A"
        performance_headers_str = "N/A"

    html_content += f"<tr><td>{result['url']}</td><td>{security_headers_str}</td><td>{performance_headers_str}</td></tr>"

html_content += "</table></body></html>"

with open('header_important.html', 'w') as html_file:
    html_file.write(html_content)

print("Filtered data saved to header_important.json and header_important.html")


Filtered data saved to header_important.json and header_important.html
