In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import logging
import urllib.parse
from concurrent.futures import ThreadPoolExecutor, as_completed

import time

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


def setup_driver():
    """Set up and return a configured Chrome WebDriver."""
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode (no UI)
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-notifications")
    # chrome_options.add_argument('--proxy-server=http://157.230.149.107:1040')  # Public proxy


    # Initialize the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)
    return driver


In [2]:
import csv
import threading
import os
import time
import random
from selenium.webdriver.common.by import By

output_file = "HDBank-news.csv"
csv_lock = threading.Lock()  # Lock for thread-safe writing

def write_headers():
    if not os.path.exists(output_file):  # Check if file exists
        with open(output_file, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=[
                "image", "title", "description", "date","detail", "detail_images"
            ])
            writer.writeheader()

# Call write_headers once to ensure headers are written if the file doesn't exist
write_headers()


In [3]:
def extract_news(url,index):
    logger.info(f"Start Extraction of News detail {index} form Web")
    item = setup_driver()
    item.get(url)
    time.sleep(15)
    try:
        # Assuming `driver` is already loaded on the detail page
        wrapper = item.find_element(By.CLASS_NAME, "wrapper-content")

        item.save_screenshot("page.png")
        
        # Get all <p> tags inside the wrapper
        paragraphs = wrapper.find_elements(By.TAG_NAME, "p")

        all_paragraphs = []
        all_images = []
        for p in paragraphs:
            # Collect text if exists
            text = p.text.strip()
            if text:
                all_paragraphs.append(text)

            # Check for <img> tag inside this <p>
            try:
                img = p.find_element(By.TAG_NAME, "img").get_attribute("src")
                all_images.append(img)
            except:
                pass  # Skip if no image
            
        return all_images, all_paragraphs
        
    except Exception as e:
        print(f"[{index + 1}] Failed to extract info: {e}")
        return [],[]

In [4]:
def store_data(row, index,href):
    logger.info(f"Start Extraction of News {index} form Web")
    try:
        all_images, all_paragraphs = extract_news(href,index)
        row['detail']=all_paragraphs
        row['detail_images']=all_images
    except Exception as e:
        print(f"[{index + 1}] Failed to Extract Detail: {e}")
        
    try:
        
        with csv_lock:
            with open(output_file, "a", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=row.keys())
                writer.writerow(row)
                
        logger.info(f"Extraction Complete of News {index} form Web")
        return row

    except Exception as e:
        print(f"[{index + 1}] Failed to insert info: {e}")

In [5]:
driver = setup_driver()
driver.get("https://hdbank.com.vn/news/moi-nhat?page=1000")

time.sleep(15)
try:
    elements = driver.find_elements(By.XPATH, "//li[contains(@class, 'item-news')]")
    element_count = len(elements)
    logger.info(f"Found {element_count} elements to scrape")
    driver.save_screenshot("page.png")
except Exception as e:
    logger.error(f"An error occurred during scraping: {str(e)}", exc_info=True)
element_count = len(elements)
print(f"Found {element_count} elements.")




2025-04-15 10:47:14,796 - INFO - Found 864 elements to scrape


Found 864 elements.


In [6]:
element_count = len(elements)

with ThreadPoolExecutor(max_workers=min(4, element_count)) as executor:
    futures_to_indices = {}
    
    for index, element in enumerate(elements):
        try:
            if index > 558:
                # Image
                try:
                    img = element.find_element(By.XPATH, ".//div[contains(@class, 'news-left-box')]//img").get_attribute("src")
                except:
                    img = ""

                # Title
                title = element.find_element(By.XPATH, ".//div[contains(@class, 'news-right-box')]//p[contains(@class, 'news-title')]").text.strip()

                # Description
                try:
                    description = element.find_element(By.XPATH, ".//p[contains(@class, 'news-description')]").text.strip()
                except:
                    description = ""

                # Date
                date = element.find_element(By.XPATH, ".//p[contains(@class, 'news-date')]/time").text.strip()

                row = {
                    "image": img,
                    "title": title,
                    "description": description,
                    "date": date
                }
                href = elements[1].find_element(By.XPATH, ".//div[contains(@class, 'news-left-box')]/a").get_attribute("href")
                future = executor.submit(store_data, row, index, href)
                futures_to_indices[future] = index
        except Exception as e:
            print(f"[{index + 1}] Failed to extract info: {e}")

    for future in as_completed(futures_to_indices):
        index = futures_to_indices[future]
        try:
            result = future.result()
            logger.info(f"Successfully scraped store at index {index}")
        except Exception as e:
            logger.error(f"Error scraping store at index {index}: {e}", exc_info=True)

2025-04-15 10:47:29,763 - INFO - Start Extraction of News 559 form Web
2025-04-15 10:47:29,767 - INFO - Start Extraction of News detail 559 form Web
2025-04-15 10:47:29,919 - INFO - Start Extraction of News 560 form Web
2025-04-15 10:47:29,924 - INFO - Start Extraction of News detail 560 form Web
2025-04-15 10:47:30,112 - INFO - Start Extraction of News 561 form Web
2025-04-15 10:47:30,116 - INFO - Start Extraction of News detail 561 form Web
2025-04-15 10:47:30,329 - INFO - Start Extraction of News 562 form Web
2025-04-15 10:47:30,341 - INFO - Start Extraction of News detail 562 form Web
2025-04-15 10:48:06,597 - INFO - Extraction Complete of News 562 form Web
2025-04-15 10:48:06,601 - INFO - Start Extraction of News 563 form Web
2025-04-15 10:48:06,605 - INFO - Start Extraction of News detail 563 form Web
2025-04-15 10:48:13,709 - INFO - Extraction Complete of News 560 form Web
2025-04-15 10:48:13,712 - INFO - Start Extraction of News 564 form Web
2025-04-15 10:48:13,716 - INFO - Sta

[598] Failed to extract info: Message: no such element: Unable to locate element: {"method":"css selector","selector":".wrapper-content"}
  (Session info: chrome=135.0.7049.85); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF7687AF149]
	(No symbol) [0x00007FF7687AF3FC]
	(No symbol) [0x00007FF768802467]
	(No symbol) [0x00007FF7687D712F]
	(No symbol) [0x00007FF7687FF2BB]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF

2025-04-15 10:57:29,776 - INFO - Extraction Complete of News 596 form Web
2025-04-15 10:57:29,785 - INFO - Start Extraction of News 601 form Web
2025-04-15 10:57:29,786 - INFO - Successfully scraped store at index 596
2025-04-15 10:57:29,791 - INFO - Start Extraction of News detail 601 form Web
2025-04-15 10:57:49,908 - INFO - Extraction Complete of News 598 form Web
2025-04-15 10:57:49,912 - INFO - Start Extraction of News 602 form Web
2025-04-15 10:57:49,912 - INFO - Successfully scraped store at index 598
2025-04-15 10:57:49,914 - INFO - Start Extraction of News detail 602 form Web
2025-04-15 10:57:53,930 - INFO - Extraction Complete of News 599 form Web
2025-04-15 10:57:53,934 - INFO - Start Extraction of News 603 form Web
2025-04-15 10:57:53,939 - INFO - Start Extraction of News detail 603 form Web
2025-04-15 10:57:53,935 - INFO - Successfully scraped store at index 599
2025-04-15 10:58:16,589 - INFO - Extraction Complete of News 600 form Web
2025-04-15 10:58:16,593 - INFO - Start

[715] Failed to Extract Detail: Message: session not created: Chrome failed to start: crashed.
  (session not created: DevToolsActivePort file doesn't exist)
  (The process started from chrome location C:\Program Files\Google\Chrome\Application\chrome.exe is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF768795DAF]
	(No symbol) [0x00007FF768791A5D]
	(No symbol) [0x00007FF7687E51E5]
	(No symbol) [0x00007FF7687E47A0]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF7689AD2B4+111236]
	GetHan

2025-04-15 11:34:59,122 - INFO - Extraction Complete of News 714 form Web
2025-04-15 11:34:59,124 - INFO - Start Extraction of News 715 form Web
2025-04-15 11:34:59,126 - INFO - Start Extraction of News detail 715 form Web
2025-04-15 11:34:59,127 - INFO - Successfully scraped store at index 714
2025-04-15 11:35:13,179 - INFO - Extraction Complete of News 711 form Web
2025-04-15 11:35:13,182 - INFO - Start Extraction of News 716 form Web
2025-04-15 11:35:13,182 - INFO - Successfully scraped store at index 711
2025-04-15 11:35:13,184 - INFO - Start Extraction of News detail 716 form Web


[717] Failed to Extract Detail: Message: session not created: Chrome failed to start: crashed.
  (session not created: DevToolsActivePort file doesn't exist)
  (The process started from chrome location C:\Program Files\Google\Chrome\Application\chrome.exe is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF768795DAF]
	(No symbol) [0x00007FF768791A5D]
	(No symbol) [0x00007FF7687E51E5]
	(No symbol) [0x00007FF7687E47A0]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF7689AD2B4+111236]
	GetHan

2025-04-15 11:35:24,820 - INFO - Extraction Complete of News 716 form Web
2025-04-15 11:35:24,822 - INFO - Start Extraction of News 717 form Web
2025-04-15 11:35:24,825 - INFO - Start Extraction of News detail 717 form Web
2025-04-15 11:35:24,824 - INFO - Successfully scraped store at index 716
2025-04-15 11:35:25,699 - INFO - Extraction Complete of News 713 form Web
2025-04-15 11:35:25,702 - INFO - Start Extraction of News 718 form Web
2025-04-15 11:35:25,705 - INFO - Start Extraction of News detail 718 form Web
2025-04-15 11:35:25,714 - INFO - Successfully scraped store at index 713


[714] Failed to Extract Detail: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: Unable to receive message from renderer
  (Session info: chrome=135.0.7049.84)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF7687465AC]
	(No symbol) [0x00007FF76874629A]
	(No symbol) [0x00007FF768743F4A]
	(No symbol) [0x00007FF7687448FF]
	(No symbol) [0x00007FF7687534FE]
	(No symbol) [0x00007FF768769931]
	(No symbol) [0x00007FF7687708DA]
	(No symbol) [0x00007FF76874506D]
	(No symbol) [0x00007FF768769121]
	(No symbol) [0x00007FF7687FF650]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVer

2025-04-15 11:35:47,189 - INFO - Extraction Complete of News 717 form Web
2025-04-15 11:35:47,193 - INFO - Start Extraction of News 719 form Web
2025-04-15 11:35:47,193 - INFO - Successfully scraped store at index 717
2025-04-15 11:35:47,195 - INFO - Start Extraction of News detail 719 form Web
2025-04-15 11:36:14,052 - INFO - Extraction Complete of News 719 form Web
2025-04-15 11:36:14,054 - INFO - Start Extraction of News 720 form Web
2025-04-15 11:36:14,056 - INFO - Start Extraction of News detail 720 form Web
2025-04-15 11:36:14,056 - INFO - Successfully scraped store at index 719
2025-04-15 11:36:34,630 - INFO - Extraction Complete of News 712 form Web
2025-04-15 11:36:34,632 - INFO - Start Extraction of News 721 form Web
2025-04-15 11:36:34,633 - INFO - Successfully scraped store at index 712
2025-04-15 11:36:34,634 - INFO - Start Extraction of News detail 721 form Web


[713] Failed to extract info: HTTPConnectionPool(host='localhost', port=64632): Read timed out. (read timeout=120)


2025-04-15 11:36:52,448 - INFO - Extraction Complete of News 720 form Web
2025-04-15 11:36:52,450 - INFO - Start Extraction of News 722 form Web
2025-04-15 11:36:52,451 - INFO - Successfully scraped store at index 720
2025-04-15 11:36:52,453 - INFO - Start Extraction of News detail 722 form Web
2025-04-15 11:37:18,314 - INFO - Extraction Complete of News 721 form Web
2025-04-15 11:37:18,317 - INFO - Start Extraction of News 723 form Web
2025-04-15 11:37:18,319 - INFO - Start Extraction of News detail 723 form Web
2025-04-15 11:37:18,318 - INFO - Successfully scraped store at index 721
2025-04-15 11:37:31,360 - INFO - Extraction Complete of News 722 form Web
2025-04-15 11:37:31,363 - INFO - Start Extraction of News 724 form Web
2025-04-15 11:37:31,590 - INFO - Start Extraction of News detail 724 form Web
2025-04-15 11:37:31,522 - INFO - Successfully scraped store at index 722
2025-04-15 11:37:47,617 - INFO - Extraction Complete of News 718 form Web
2025-04-15 11:37:47,620 - INFO - Start

[719] Failed to extract info: HTTPConnectionPool(host='localhost', port=57591): Read timed out. (read timeout=120)


2025-04-15 11:38:12,341 - INFO - Extraction Complete of News 723 form Web
2025-04-15 11:38:12,345 - INFO - Start Extraction of News 726 form Web
2025-04-15 11:38:12,346 - INFO - Successfully scraped store at index 723
2025-04-15 11:38:12,348 - INFO - Start Extraction of News detail 726 form Web
2025-04-15 11:38:22,833 - INFO - Extraction Complete of News 724 form Web
2025-04-15 11:38:22,836 - INFO - Start Extraction of News 727 form Web
2025-04-15 11:38:22,837 - INFO - Successfully scraped store at index 724
2025-04-15 11:38:22,838 - INFO - Start Extraction of News detail 727 form Web
2025-04-15 11:38:39,368 - INFO - Extraction Complete of News 725 form Web
2025-04-15 11:38:39,371 - INFO - Start Extraction of News 728 form Web
2025-04-15 11:38:39,372 - INFO - Successfully scraped store at index 725
2025-04-15 11:38:39,374 - INFO - Start Extraction of News detail 728 form Web
2025-04-15 11:39:01,052 - INFO - Extraction Complete of News 726 form Web
2025-04-15 11:39:01,056 - INFO - Start

[765] Failed to extract info: HTTPConnectionPool(host='localhost', port=51139): Read timed out. (read timeout=120)


2025-04-15 11:54:30,518 - INFO - Extraction Complete of News 768 form Web
2025-04-15 11:54:30,521 - INFO - Start Extraction of News 771 form Web
2025-04-15 11:54:30,522 - INFO - Successfully scraped store at index 768
2025-04-15 11:54:30,524 - INFO - Start Extraction of News detail 771 form Web
2025-04-15 11:54:50,634 - INFO - Extraction Complete of News 769 form Web
2025-04-15 11:54:50,638 - INFO - Start Extraction of News 772 form Web
2025-04-15 11:54:50,638 - INFO - Successfully scraped store at index 769
2025-04-15 11:54:50,640 - INFO - Start Extraction of News detail 772 form Web
2025-04-15 11:55:01,351 - INFO - Extraction Complete of News 771 form Web
2025-04-15 11:55:01,354 - INFO - Start Extraction of News 773 form Web
2025-04-15 11:55:01,355 - INFO - Successfully scraped store at index 771
2025-04-15 11:55:01,357 - INFO - Start Extraction of News detail 773 form Web
2025-04-15 11:55:08,417 - INFO - Extraction Complete of News 770 form Web
2025-04-15 11:55:08,421 - INFO - Start

[798] Failed to extract info: Message: no such element: Unable to locate element: {"method":"css selector","selector":".wrapper-content"}
  (Session info: chrome=135.0.7049.85); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF7687AF149]
	(No symbol) [0x00007FF7687AF3FC]
	(No symbol) [0x00007FF768802467]
	(No symbol) [0x00007FF7687D712F]
	(No symbol) [0x00007FF7687FF2BB]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF

2025-04-15 12:04:01,239 - INFO - Extraction Complete of News 795 form Web
2025-04-15 12:04:01,243 - INFO - Start Extraction of News 800 form Web
2025-04-15 12:04:01,248 - INFO - Start Extraction of News detail 800 form Web
2025-04-15 12:04:01,245 - INFO - Successfully scraped store at index 795
2025-04-15 12:04:29,303 - INFO - Extraction Complete of News 796 form Web
2025-04-15 12:04:29,307 - INFO - Start Extraction of News 801 form Web
2025-04-15 12:04:29,307 - INFO - Successfully scraped store at index 796
2025-04-15 12:04:29,311 - INFO - Start Extraction of News detail 801 form Web
2025-04-15 12:04:36,252 - INFO - Extraction Complete of News 798 form Web
2025-04-15 12:04:36,255 - INFO - Start Extraction of News 802 form Web
2025-04-15 12:04:36,257 - INFO - Successfully scraped store at index 798
2025-04-15 12:04:36,258 - INFO - Start Extraction of News detail 802 form Web
2025-04-15 12:04:52,158 - INFO - Extraction Complete of News 799 form Web
2025-04-15 12:04:52,161 - INFO - Start

[815] Failed to Extract Detail: Message: session not created: Chrome failed to start: crashed.
  (session not created: DevToolsActivePort file doesn't exist)
  (The process started from chrome location C:\Users\Techset\.cache\selenium\chrome\win64\135.0.7049.84\chrome.exe is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF768795DAF]
	(No symbol) [0x00007FF768791A5D]
	(No symbol) [0x00007FF7687E51E5]
	(No symbol) [0x00007FF7687E47A0]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF7689AD2B

2025-04-15 12:08:45,490 - INFO - Extraction Complete of News 813 form Web
2025-04-15 12:08:45,493 - INFO - Start Extraction of News 815 form Web
2025-04-15 12:08:45,495 - INFO - Start Extraction of News detail 815 form Web
2025-04-15 12:08:45,501 - INFO - Successfully scraped store at index 813


[814] Failed to Extract Detail: Message: tab crashed
  (Session info: chrome=135.0.7049.84)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF768758FDC]
	(No symbol) [0x00007FF76874623B]
	(No symbol) [0x00007FF768743F4A]
	(No symbol) [0x00007FF7687448FF]
	(No symbol) [0x00007FF7687534FE]
	(No symbol) [0x00007FF76876A0E0]
	(No symbol) [0x00007FF7687FF45E]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF7689AD2B4+111236]
	GetHandleVerifier [0x00007FF7689AD462+111666]
	GetHandleVerifier [0x00007FF768993589+5465]
	BaseThreadInitThunk [0x00007FF9BA6EE8D7+23]
	RtlUserThreadStart [0x00007FF9BC51BF6C+44

2025-04-15 12:08:46,553 - INFO - Extraction Complete of News 815 form Web
2025-04-15 12:08:46,555 - INFO - Start Extraction of News 816 form Web
2025-04-15 12:08:46,556 - INFO - Successfully scraped store at index 815
2025-04-15 12:08:46,557 - INFO - Start Extraction of News detail 816 form Web


[816] Failed to Extract Detail: Message: Unable to obtain driver for chrome; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location



2025-04-15 12:08:48,596 - INFO - Extraction Complete of News 816 form Web
2025-04-15 12:08:48,598 - INFO - Start Extraction of News 817 form Web
2025-04-15 12:08:48,598 - INFO - Successfully scraped store at index 816
2025-04-15 12:08:48,599 - INFO - Start Extraction of News detail 817 form Web


[817] Failed to Extract Detail: Message: Unable to obtain driver for chrome; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location



2025-04-15 12:08:49,337 - INFO - Extraction Complete of News 814 form Web
2025-04-15 12:08:49,338 - INFO - Start Extraction of News 818 form Web
2025-04-15 12:08:49,339 - INFO - Successfully scraped store at index 814
2025-04-15 12:08:49,340 - INFO - Start Extraction of News detail 818 form Web
2025-04-15 12:08:49,610 - INFO - Extraction Complete of News 817 form Web
2025-04-15 12:08:49,613 - INFO - Start Extraction of News 819 form Web
2025-04-15 12:08:49,613 - INFO - Successfully scraped store at index 817
2025-04-15 12:08:49,615 - INFO - Start Extraction of News detail 819 form Web


[818] Failed to Extract Detail: Message: Unable to obtain driver for chrome; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location

[820] Failed to Extract Detail: Message: session not created: Chrome failed to start: crashed.
  (session not created: DevToolsActivePort file doesn't exist)
  (The process started from chrome location C:\Program Files\Google\Chrome\Application\chrome.exe is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF768795DAF]
	(No symbol) [0x00007FF768791A5D]
	(No symbol) [0x00007FF7687E51E5]
	(No symbol) [0x00007FF7687E47A0]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF76

2025-04-15 12:09:04,865 - INFO - Extraction Complete of News 819 form Web
2025-04-15 12:09:04,867 - INFO - Start Extraction of News 820 form Web
2025-04-15 12:09:04,868 - INFO - Successfully scraped store at index 819
2025-04-15 12:09:04,869 - INFO - Start Extraction of News detail 820 form Web
2025-04-15 12:09:04,912 - INFO - Extraction Complete of News 818 form Web
2025-04-15 12:09:04,915 - INFO - Start Extraction of News 821 form Web
2025-04-15 12:09:04,915 - INFO - Successfully scraped store at index 818
2025-04-15 12:09:04,917 - INFO - Start Extraction of News detail 821 form Web
2025-04-15 12:09:18,354 - INFO - Extraction Complete of News 821 form Web
2025-04-15 12:09:18,355 - INFO - Start Extraction of News 822 form Web
2025-04-15 12:09:18,356 - INFO - Successfully scraped store at index 821
2025-04-15 12:09:18,357 - INFO - Start Extraction of News detail 822 form Web


[822] Failed to Extract Detail: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: Unable to receive message from renderer
  (Session info: chrome=135.0.7049.85)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF7687465AC]
	(No symbol) [0x00007FF76874629A]
	(No symbol) [0x00007FF768743F4A]
	(No symbol) [0x00007FF7687448FF]
	(No symbol) [0x00007FF7687534FE]
	(No symbol) [0x00007FF768769931]
	(No symbol) [0x00007FF7687708DA]
	(No symbol) [0x00007FF76874506D]
	(No symbol) [0x00007FF768769121]
	(No symbol) [0x00007FF7687FF650]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVer

2025-04-15 12:09:22,791 - INFO - Extraction Complete of News 809 form Web
2025-04-15 12:09:22,793 - INFO - Start Extraction of News 823 form Web
2025-04-15 12:09:22,794 - INFO - Successfully scraped store at index 809
2025-04-15 12:09:22,794 - INFO - Start Extraction of News detail 823 form Web


[810] Failed to extract info: HTTPConnectionPool(host='localhost', port=61509): Read timed out. (read timeout=120)


2025-04-15 12:09:43,821 - INFO - Extraction Complete of News 823 form Web
2025-04-15 12:09:43,824 - INFO - Start Extraction of News 824 form Web
2025-04-15 12:09:43,826 - INFO - Start Extraction of News detail 824 form Web
2025-04-15 12:09:43,830 - INFO - Successfully scraped store at index 823


[824] Failed to extract info: Message: no such element: Unable to locate element: {"method":"css selector","selector":".wrapper-content"}
  (Session info: chrome=135.0.7049.85); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF7687AF149]
	(No symbol) [0x00007FF7687AF3FC]
	(No symbol) [0x00007FF768802467]
	(No symbol) [0x00007FF7687D712F]
	(No symbol) [0x00007FF7687FF2BB]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF

2025-04-15 12:09:53,704 - INFO - Extraction Complete of News 822 form Web
2025-04-15 12:09:53,705 - INFO - Start Extraction of News 825 form Web
2025-04-15 12:09:53,706 - INFO - Successfully scraped store at index 822
2025-04-15 12:09:53,706 - INFO - Start Extraction of News detail 825 form Web
2025-04-15 12:10:00,970 - INFO - Extraction Complete of News 824 form Web
2025-04-15 12:10:10,980 - INFO - Start Extraction of News 826 form Web
2025-04-15 12:10:10,980 - INFO - Successfully scraped store at index 824
2025-04-15 12:10:13,470 - INFO - Extraction Complete of News 812 form Web
2025-04-15 12:10:20,993 - INFO - Start Extraction of News detail 826 form Web
2025-04-15 12:10:29,656 - INFO - Extraction Complete of News 825 form Web
2025-04-15 12:10:41,024 - INFO - Start Extraction of News 827 form Web
2025-04-15 12:10:41,025 - INFO - Successfully scraped store at index 812
2025-04-15 12:11:01,037 - INFO - Start Extraction of News 828 form Web
2025-04-15 12:11:07,829 - INFO - Extraction C

[825] Failed to Extract Detail: Message: session not created: Chrome failed to start: was killed.
  (chrome not reachable)
  (The process started from chrome location C:\Program Files\Google\Chrome\Application\chrome.exe is no longer running, so ChromeDriver is assuming that Chrome has crashed.)
Stacktrace:
	GetHandleVerifier [0x00007FF7689A5335+78597]
	GetHandleVerifier [0x00007FF7689A5390+78688]
	(No symbol) [0x00007FF7687591AA]
	(No symbol) [0x00007FF768795DAF]
	(No symbol) [0x00007FF768791A5D]
	(No symbol) [0x00007FF7687E51E5]
	(No symbol) [0x00007FF7687E47A0]
	(No symbol) [0x00007FF7687D6EC3]
	(No symbol) [0x00007FF7687A03F8]
	(No symbol) [0x00007FF7687A1163]
	GetHandleVerifier [0x00007FF768C4EEED+2870973]
	GetHandleVerifier [0x00007FF768C49698+2848360]
	GetHandleVerifier [0x00007FF768C66973+2967875]
	GetHandleVerifier [0x00007FF7689C017A+188746]
	GetHandleVerifier [0x00007FF7689C845F+222255]
	GetHandleVerifier [0x00007FF7689AD2B4+111236]
	GetHandleVerifier [0x00007FF7689AD462+111

2025-04-15 12:14:48,117 - INFO - Extraction Complete of News 831 form Web
2025-04-15 12:14:48,119 - INFO - Start Extraction of News 835 form Web
2025-04-15 12:14:48,120 - INFO - Successfully scraped store at index 831
2025-04-15 12:14:48,123 - INFO - Start Extraction of News detail 835 form Web
2025-04-15 12:14:57,684 - INFO - Extraction Complete of News 833 form Web
2025-04-15 12:14:57,686 - INFO - Start Extraction of News 836 form Web
2025-04-15 12:14:57,686 - INFO - Successfully scraped store at index 833
2025-04-15 12:14:57,688 - INFO - Start Extraction of News detail 836 form Web
2025-04-15 12:15:03,421 - INFO - Extraction Complete of News 834 form Web
2025-04-15 12:15:03,423 - INFO - Start Extraction of News 837 form Web
2025-04-15 12:15:03,424 - INFO - Successfully scraped store at index 834
2025-04-15 12:15:03,427 - INFO - Start Extraction of News detail 837 form Web
2025-04-15 12:15:06,782 - INFO - Extraction Complete of News 832 form Web
2025-04-15 12:15:06,784 - INFO - Start