In [43]:
# https://gist.github.com/iiLaurens/81b1b47f6259485c93ce6f0cdd17490a
clickable = """
window.scrollTo(0, 0);
var bodyRect = document.body.getBoundingClientRect();

var items = Array.prototype.slice.call(
  document.querySelectorAll('*')
).map(function (element) {
  var rect = element.getBoundingClientRect();
  var content = element.textContent.trim().replace(/\s{2,}/g, ' ');
  return {
    element: element,
    include: (element.tagName === "BUTTON" || element.tagName === "A" || (element.onclick != null) || window.getComputedStyle(element).cursor == "pointer") && content.length > 0,
    rect: {
      left: Math.max(rect.left - bodyRect.x, 0),
      top: Math.max(rect.top - bodyRect.y, 0),
      right: Math.min(rect.right - bodyRect.x, document.body.clientWidth),
      bottom: Math.min(rect.bottom - bodyRect.y, document.body.clientHeight)
    },
    text: content
  };
}).filter(item =>
  item.include && ((item.rect.right - item.rect.left) * (item.rect.bottom - item.rect.top) >= 20));

// Only keep inner clickable items
items = items.filter(x => !items.some(y => x.element.contains(y.element) && !(x == y)));

// Remove items that don't have any content
items = items.filter(item => item.text.length > 0);

// Create a floating border on top of these elements that will always be visible
items.forEach(function (item) {
  var newElement = document.createElement("div");
  newElement.style.outline = "2px dashed rgba(255,0,0,.75)";
  newElement.style.position = "absolute";
  newElement.style.left = item.rect.left + "px";
  newElement.style.top = item.rect.top + "px";
  newElement.style.width = (item.rect.right - item.rect.left) + "px";
  newElement.style.height = (item.rect.bottom - item.rect.top) + "px";
  newElement.style.pointerEvents = "none";
  newElement.style.boxSizing = "border-box";
  newElement.style.zIndex = 2147483647;
  document.body.appendChild(newElement);
});
"""

In [44]:
writeable = """
window.scrollTo(0, 0);
var bodyRect = document.body.getBoundingClientRect();

var items = Array.prototype.slice.call(document.querySelectorAll('*')).map(function(element) {
  var rect = element.getBoundingClientRect();
  var isInputField = ['INPUT', 'TEXTAREA'].includes(element.tagName);
  var isContentEditable = element.isContentEditable === true;
  var include = (
    isInputField ||
    isContentEditable
  );

  return {
    element: element,
    include: include,
    rect: {
      left: Math.max(rect.left - bodyRect.x, 0),
      top: Math.max(rect.top - bodyRect.y, 0),
      right: Math.min(rect.right - bodyRect.x, document.body.clientWidth),
      bottom: Math.min(rect.bottom - bodyRect.y, document.body.clientHeight),
    },
    text: element.textContent.trim().replace(/\s{2,}/g, ' '),
  };
}).filter(item => item.include && ((item.rect.right - item.rect.left) * (item.rect.bottom - item.rect.top) >= 20));

// Only keep inner clickable items
items = items.filter(x => !items.some(y => x.element.contains(y.element) && !(x == y)));

// Create a floating border on top of these elements that will always be visible
items.forEach(function(item) {
  var newElement = document.createElement('div');
  newElement.style.outline = '2px dashed rgba(0, 255, 0, .75)'; // Change the color to green
  newElement.style.position = 'absolute';
  newElement.style.left = item.rect.left + 'px';
  newElement.style.top = item.rect.top + 'px';
  newElement.style.width = (item.rect.right - item.rect.left) + 'px';
  newElement.style.height = (item.rect.bottom - item.rect.top) + 'px';
  newElement.style.pointerEvents = 'none';
  newElement.style.boxSizing = 'border-box';
  newElement.style.zIndex = 2147483647;
  document.body.appendChild(newElement);
});


"""

In [74]:
text_region = """ 
window.scrollTo(0, 0);
var bodyRect = document.body.getBoundingClientRect();

var items = Array.prototype.slice.call(document.querySelectorAll('*')).map(function(element) {
  var rect = element.getBoundingClientRect();
  var isInputField = ['INPUT', 'TEXTAREA'].includes(element.tagName);
  var isClickableElement = ['A', 'BUTTON'].includes(element.tagName) || (element.onclick != null) || window.getComputedStyle(element).cursor == 'pointer';
  var isContentEditable = element.isContentEditable === true;
  var include = (
    !isInputField &&
    !isClickableElement &&
    !isContentEditable &&
    (element.textContent.trim().length > 0) && // Check if the element contains non-empty text
    (element.textContent.trim().length >= 3) // Check if the element's content length is greater than or equal to 3
  );

  return {
    element: element,
    include: include,
    rect: {
      left: Math.max(rect.left - bodyRect.x, 0),
      top: Math.max(rect.top - bodyRect.y, 0),
      right: Math.min(rect.right - bodyRect.x, document.body.clientWidth),
      bottom: Math.min(rect.bottom - bodyRect.y, document.body.clientHeight),
    },
    text: element.textContent.trim().replace(/\s{2,}/g, ' '),
  };
}).filter(item => item.include && ((item.rect.right - item.rect.left) * (item.rect.bottom - item.rect.top) >= 20));

// Only keep inner elements
items = items.filter(x => !items.some(y => x.element.contains(y.element) && !(x == y)));

// Remove items that don't have any content
items = items.filter(item => item.text.length > 0);

// Filter out items containing clickable elements
items = items.filter(item => !Array.from(item.element.querySelectorAll('*')).some(child => {
  var isClickableChild = ['A', 'BUTTON'].includes(child.tagName) || (child.onclick != null) || window.getComputedStyle(child).cursor == 'pointer';
  return isClickableChild;
}));

// Create a floating border on top of these elements that will always be visible
items.forEach(function(item) {
  var newElement = document.createElement('div');
  newElement.style.outline = '2px dashed rgba(0, 0, 0, .75)'; // Change the color to black
  newElement.style.position = 'absolute';
  newElement.style.left = item.rect.left + 'px';
  newElement.style.top = item.rect.top + 'px';
  newElement.style.width = (item.rect.right - item.rect.left) + 'px';
  newElement.style.height = (item.rect.bottom - item.rect.top) + 'px';
  newElement.style.pointerEvents = 'none';
  newElement.style.boxSizing = 'border-box';
  newElement.style.zIndex = 2147483647;
  document.body.appendChild(newElement);
});

"""

In [75]:
link = """
window.scrollTo(0, 0);
var bodyRect = document.body.getBoundingClientRect();

var items = Array.prototype.slice.call(document.querySelectorAll('a')).map(function(element) {
  var rect = element.getBoundingClientRect();
  var include = (
    element.textContent.trim().length > 0 && // Check if the link has non-empty text
    (rect.width * rect.height >= 20) // Check if the link has a minimum size
  );

  return {
    element: element,
    include: include,
    rect: {
      left: Math.max(rect.left - bodyRect.x, 0),
      top: Math.max(rect.top - bodyRect.y, 0),
      right: Math.min(rect.right - bodyRect.x, document.body.clientWidth),
      bottom: Math.min(rect.bottom - bodyRect.y, document.body.clientHeight),
    },
    text: element.textContent.trim().replace(/\s{2,}/g, ' '),
  };
}).filter(item => item.include);

// Create a floating border around each link element with purple color
items.forEach(function(item) {
  var newElement = document.createElement('div');
  newElement.style.outline = '2px dashed rgba(128, 0, 128, 0.75)'; // Purple color
  newElement.style.position = 'absolute';
  newElement.style.left = item.rect.left + 'px';
  newElement.style.top = item.rect.top + 'px';
  newElement.style.width = (item.rect.right - item.rect.left) + 'px';
  newElement.style.height = (item.rect.bottom - item.rect.top) + 'px';
  newElement.style.pointerEvents = 'none';
  newElement.style.boxSizing = 'border-box';
  newElement.style.zIndex = 2147483647;
  document.body.appendChild(newElement);
});

"""

In [76]:
images = """
window.scrollTo(0, 0);
var bodyRect = document.body.getBoundingClientRect();

// Process link elements
var linkItems = Array.prototype.slice.call(document.querySelectorAll('a')).map(function(element) {
  var rect = element.getBoundingClientRect();
  var include = (
    element.textContent.trim().length > 0 && // Check if the link has non-empty text
    (rect.width * rect.height >= 20) // Check if the link has a minimum size
  );

  return {
    element: element,
    include: include,
    rect: {
      left: Math.max(rect.left - bodyRect.x, 0),
      top: Math.max(rect.top - bodyRect.y, 0),
      right: Math.min(rect.right - bodyRect.x, document.body.clientWidth),
      bottom: Math.min(rect.bottom - bodyRect.y, document.body.clientHeight),
    },
    text: element.textContent.trim().replace(/\s{2,}/g, ' '),
  };
}).filter(item => item.include);

// Process image elements
var imageItems = Array.prototype.slice.call(document.querySelectorAll('img')).map(function(element) {
  var rect = element.getBoundingClientRect();
  var include = (
    (rect.width * rect.height >= 20) // Check if the image has a minimum size
  );

  return {
    element: element,
    include: include,
    rect: {
      left: Math.max(rect.left - bodyRect.x, 0),
      top: Math.max(rect.top - bodyRect.y, 0),
      right: Math.min(rect.right - bodyRect.x, document.body.clientWidth),
      bottom: Math.min(rect.bottom - bodyRect.y, document.body.clientHeight),
    },
  };
}).filter(item => item.include);

// Create pink borders around images
imageItems.forEach(function(item) {
  var newElement = document.createElement('div');
  newElement.style.outline = '2px dashed rgba(255, 192, 203, 0.75)'; // Pink color
  newElement.style.position = 'absolute';
  newElement.style.left = item.rect.left + 'px';
  newElement.style.top = item.rect.top + 'px';
  newElement.style.width = (item.rect.right - item.rect.left) + 'px';
  newElement.style.height = (item.rect.bottom - item.rect.top) + 'px';
  newElement.style.pointerEvents = 'none';
  newElement.style.boxSizing = 'border-box';
  newElement.style.zIndex = 2147483647;
  document.body.appendChild(newElement);
});

// Create purple borders around links
linkItems.forEach(function(item) {
  var newElement = document.createElement('div');
  newElement.style.outline = '2px dashed rgba(128, 0, 128, 0.75)'; // Purple color
  newElement.style.position = 'absolute';
  newElement.style.left = item.rect.left + 'px';
  newElement.style.top = item.rect.top + 'px';
  newElement.style.width = (item.rect.right - item.rect.left) + 'px';
  newElement.style.height = (item.rect.bottom - item.rect.top) + 'px';
  newElement.style.pointerEvents = 'none';
  newElement.style.boxSizing = 'border-box';
  newElement.style.zIndex = 2147483647;
  document.body.appendChild(newElement);
});

"""

In [77]:
def start_browesr():
  from selenium import webdriver
  from selenium.webdriver.chrome.service import Service

  
  chrome_options = webdriver.ChromeOptions()
  #chrome_options.add_argument('--headless')
  chrome_options.add_argument('--no-sandbox')
  chrome_options.add_argument('--lang=en')
  #chrome_options.headless = True
  return webdriver.Remote("http://host.docker.internal:4444/wd/hub",options=chrome_options)

In [78]:
def web_driver_to_image(wd,prefix=""):
  import re
  import os
  from urllib.parse import urlparse, urlunparse
  
  url = wd.current_url
  # Parse the URL
  parsed_url = urlparse(url)

  # Remove the query parameters
  clean_url = urlunparse((parsed_url.scheme, parsed_url.netloc, parsed_url.path, "", "", ""))
  file_name = re.sub(r'[^a-zA-Z0-9_-]', '_', clean_url)
  file_name = f"{prefix}{file_name}"

  wd.save_screenshot(f"{file_name}.png")
  return file_name

In [79]:
import time
urls = [
        # "https://www.google.com",
        # "https://www.bbc.com/",
        # "https://www.facebook.com/",
        # "https://twitter.com/Benioff",
        # "https://twitter.com/bchesky",
        # "https://www.reddit.com/",
        # "https://www.ynet.co.il/",
        "https://www.yahoo.com/",
        # "https://www.yad2.co.il/"
        ]

for url in urls:
    try:
        wd = start_browesr()
        wd.get(url)
        time.sleep(5)
        wd.execute_script(clickable)
        wd.execute_script(writeable)
        wd.execute_script(text_region)
        wd.execute_script(link)
        wd.execute_script(images)
        web_driver_to_image(wd)
    finally:
        wd.close()
        wd.quit()

JavascriptException: Message: javascript error: computedStyle is not defined
  (Session info: chrome=114.0.5735.133)
Stacktrace:
#0 0x5570fae384e3 <unknown>
#1 0x5570fab67c76 <unknown>
#2 0x5570fab6c53c <unknown>
#3 0x5570fab6e480 <unknown>
#4 0x5570fabdbf32 <unknown>
#5 0x5570fabc3012 <unknown>
#6 0x5570fabdb30e <unknown>
#7 0x5570fabc2de3 <unknown>
#8 0x5570fab982dd <unknown>
#9 0x5570fab9934e <unknown>
#10 0x5570fadf83e4 <unknown>
#11 0x5570fadfc3d7 <unknown>
#12 0x5570fae06b20 <unknown>
#13 0x5570fadfd023 <unknown>
#14 0x5570fadcb1aa <unknown>
#15 0x5570fae216b8 <unknown>
#16 0x5570fae21847 <unknown>
#17 0x5570fae31243 <unknown>
#18 0x7fa83c36f609 start_thread


Seems like we are missing alot of elements on the screen that are hidden but are detected by this logic