In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import logging
import urllib.parse
from concurrent.futures import ThreadPoolExecutor, as_completed

import time

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


def setup_driver():
    """Set up and return a configured Chrome WebDriver."""
    chrome_options = Options()
    # chrome_options.add_argument("--headless")  # Run in headless mode (no UI)
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-notifications")
    # chrome_options.add_argument('--proxy-server=http://157.230.149.107:1040')  # Public proxy


    # Initialize the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)
    return driver


In [5]:
import csv
import threading
import os
import time
import random
from selenium.webdriver.common.by import By

output_file = "HDBank-atm.csv"
csv_lock = threading.Lock()  # Lock for thread-safe writing

def write_headers():
    if not os.path.exists(output_file):  # Check if file exists
        with open(output_file, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=[
                "title", "address"
            ])
            writer.writeheader()

# Call write_headers once to ensure headers are written if the file doesn't exist
write_headers()


def store_data(row, index):
    try:
        with csv_lock:
            with open(output_file, "a", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=row.keys())
                writer.writerow(row)

        return row

    except Exception as e:
        print(f"[{index + 1}] Failed to insert info: {e}")

In [None]:
driver = setup_driver()
driver.get("https://hdbank.com.vn/vi/atm-branch")

# Wait until the tab list is loaded
WebDriverWait(driver, 15).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "ul.tab-list"))
)
# Find the ATM tab by visible text
atm_tab = driver.find_element(By.XPATH, "//ul[@class='tab-list d-flex']/li[.//text()[contains(., 'ATM')]]")

# Click the ATM tab
atm_tab.click()

# Optional: wait a bit for the content to update
time.sleep(5)
try:
    elements = driver.find_elements(By.XPATH, "//li[contains(@class, 'panel_list__item')]")
    element_count = len(elements)
    logger.info(f"Found {element_count} elements to scrape")
    driver.save_screenshot("page.png")
except Exception as e:
    logger.error(f"An error occurred during scraping: {str(e)}", exc_info=True)
element_count = len(elements)
print(f"Found {element_count} elements.")

# Randomly select 5 stores from the available elements
selected_elements = random.sample(elements, min(10, element_count))



In [None]:

for index, element in enumerate(elements):
    try:
        title = element.find_element(By.XPATH, ".//div[contains(@class, 'panel_list__item-title')]/p").text
        address = element.find_element(By.XPATH, ".//div[contains(@class, 'panel_list__item-desc')]/p").text
        
        row = {
            "title": title,
            "address":address
        }
        store_data(row,index)
        
    except Exception as e:
        print(f"[{index + 1}] Failed to extract info: {e}")