In [1]:
import json
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# load URLs and sizes from the JSON file
with open('url_sizes.json', 'r') as file:
    data = json.load(file)

# setup selenium webdriver
options = Options()
options.add_argument("--headless")  # run in headless mode
service = Service(executable_path='C:\\Windows\\System32\\chromedriver.exe')
driver = webdriver.Chrome(service=service, options=options)

# list to hold the results
results = []

# loop the URLs and measure their load times
total_urls = len(data)
for i, entry in enumerate(data, start=1):
    url = entry['url']
    page_size = entry['size']
    try:
        # measure load time using selenium
        start_time = time.time()
        driver.get(url)
        end_time = time.time()
        load_time = end_time - start_time
        
        # progress print
        print(f"{i}/{total_urls} - {url}: {page_size} bytes, {load_time:.2f} seconds")
        
        # append result to the list
        results.append({'url': url, 'size': page_size, 'load_time': load_time})
    except Exception as e:
        # for selenium errors
        print(f"{i}/{total_urls} - {url}: Selenium error ({e})")
        results.append({'url': url, 'size': page_size, 'load_time': None, 'error': str(e)})

# save results to JSON file
with open('pagesize_load_time.json', 'w') as json_file:
    json.dump(results, json_file, indent=4)

# save results to HTML file
html_content = "<html><head><title>URL Sizes and Load Times</title></head><body><table border='1'>"
html_content += "<tr><th>URL</th><th>Size</th><th>Load Time (s)</th></tr>"

for result in results:
    size_str = f"{result['size']} B" if result['size'] is not None else "N/A"
    load_time_str = f"{result['load_time']:.2f}" if result['load_time'] is not None else "N/A"
    html_content += f"<tr><td>{result['url']}</td><td>{size_str}</td><td>{load_time_str}</td></tr>"

html_content += "</table></body></html>"

with open('pagesize_load_time.html', 'w') as html_file:
    html_file.write(html_content)

print("Data saved to pagesize_load_time.json and pagesize_load_time.html")

# quit webdriver
driver.quit()


1/536 - https://im.hthayat.com: None bytes, 2.72 seconds
2/536 - https://im.haberturk.com: 18 bytes, 0.06 seconds
3/536 - https://static.criteo.net: 43 bytes, 0.18 seconds
4/536 - https://adsp.haberturk.com: 145 bytes, 0.08 seconds
5/536 - https://geoim.bloomberght.com: 162 bytes, 0.06 seconds
6/536 - https://im.showtv.com.tr: 162 bytes, 0.14 seconds
7/536 - https://mo.ciner.com.tr: 162 bytes, 0.07 seconds
8/536 - https://www.haberturk.com/images/common/favicon/16x16.png: 529 bytes, 0.06 seconds
9/536 - https://www.haberturk.com/manifest.webmanifest: 744 bytes, 0.01 seconds
10/536 - https://www.haberturk.com/images/common/favicon/32x32.png: 913 bytes, 0.01 seconds
11/536 - https://www.googletagmanager.com: 1555 bytes, 1.16 seconds
12/536 - https://pagead2.googlesyndication.com: 1561 bytes, 0.20 seconds
13/536 - https://www.googletagservices.com: 1561 bytes, 0.18 seconds
14/536 - https://securepubads.g.doubleclick.net: 1561 bytes, 0.22 seconds
15/536 - https://www.haberturk.com/images/c