In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import logging
import urllib.parse
from concurrent.futures import ThreadPoolExecutor, as_completed

import time

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


def setup_driver():
    """Set up and return a configured Chrome WebDriver."""
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode (no UI)
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-notifications")
    # chrome_options.add_argument('--proxy-server=http://157.230.149.107:1040')  # Public proxy


    # Initialize the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)
    return driver


In [2]:
import csv
import threading
import os
import time
import random
from selenium.webdriver.common.by import By

output_file = "BizUnifi-promos.csv"
csv_lock = threading.Lock()  # Lock for thread-safe writing

def write_headers():
    if not os.path.exists(output_file):  # Check if file exists
        with open(output_file, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=[
                "image", "title", "description", "date","detail"
            ])
            writer.writeheader()

# Call write_headers once to ensure headers are written if the file doesn't exist
write_headers()


In [3]:
def extract_promotions(url, index):
    """Extract promotions from the provided URL and return them."""
    logger.info(f"Start Extraction of Promotions from {url} (Index: {index})")

    item = setup_driver()
    item.get(url)
    time.sleep(5)
    try:
        print("extract_promotions1")
        # Use WebDriverWait to wait until the container for promotions is visible
        WebDriverWait(item, 180).until(
            EC.visibility_of_element_located((By.CLASS_NAME, "rounded-t-3xl"))
        )
        print("extract_promotions2")
        # Locate the promotion container on the page
        wrapper = item.find_element(By.CLASS_NAME, "rounded-t-3xl")
        detail = wrapper.text
        return detail        

    except Exception as e:
        logger.error(f"Error extracting promotion data from {url}: {e}")
        return ""

In [4]:
def store_data(row, index,href):
    logger.info(f"Start Extraction of  {index} form Web")
    try:
        detail = extract_promotions(href,index)
        row['detail']=detail
    except Exception as e:
        print(f"[{index + 1}] Failed to Extract Detail: {e}")
        
    try:
        
        with csv_lock:
            with open(output_file, "a", newline="", encoding="utf-8") as f:
                writer = csv.DictWriter(f, fieldnames=row.keys())
                writer.writerow(row)
                
        logger.info(f"Extraction Complete of News {index} form Web")
        return row

    except Exception as e:
        print(f"[{index + 1}] Failed to insert info: {e}")

In [19]:
driver = setup_driver()

# Set a longer page load timeout
driver.set_page_load_timeout(180)
driver.set_script_timeout(180) 

elements = []  # Initialize elements to an empty list

try:
    driver.get("https://biz.unifi.com.my/promotions")
    time.sleep(15)  # Wait for the page to load completely

    elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'md:max-w-[540px]')]")

    if not elements:  # Check if the list is empty
        print("No elements found using the provided XPath.")
    else:
        print(f"Found--------------------------- {len(elements)} elements.")
    element_count = len(elements)
    logger.info(f"Found {element_count} elements to scrape")
    driver.save_screenshot("hello.png")  # Save screenshot with a unique name

except Exception as e:
    logger.error(f"An error occurred during scraping: {str(e)}", exc_info=True)

element_count = len(elements)
print(f"Found {element_count} elements.")




2025-05-02 16:49:51,387 - INFO - Found 10 elements to scrape


Found--------------------------- 10 elements.
Found 10 elements.


In [23]:
# Title
try:
    title = elements[0].find_element(By.XPATH, ".//div[contains(@class, 'p-6')]//h4").text
except Exception:
    title = ""

In [24]:
# elements[0].get_attribute("outerHTML")

title

'Ultra-Fast Internet For Your Business!'

In [26]:
element_count = len(elements)


links = driver.find_elements(
                        By.XPATH,
                        "//a[contains(@class, 'overflow-hidden') and contains(@class, 'inline-flex') and contains(@class, 'text-tm-cobalt-blue-600')]"
                    )   


if element_count > 0:  # Ensure element_count is greater than 0
    with ThreadPoolExecutor(max_workers=min(4, element_count)) as executor:
        futures_to_indices = {}
        
        for index, element in enumerate(elements):
            try:
                if index > 0:  # Or adjust your index condition if needed
                    # Image
                    try:
                        img = element.find_element(By.XPATH, ".//div[contains(@class, 'card_image')]//img").get_attribute("src")
                    except Exception:
                        img = ""

                    # Title
                    try:
                        title = element.find_element(By.XPATH, ".//div[contains(@class, 'p-6')]//h4").text
                    except Exception:
                        title = ""

                    # Description
                    try:
                        description = element.find_element(By.XPATH, ".//div[contains(@class, 'p-6')]//p").text
                    except Exception:
                        description = ""

                    # Date (Note: no date in your provided HTML. I'll leave it blank)
                    date = ""

                    # Link (href)
                    try:
                        href = links[index].get_attribute("href")                  
                    except Exception:
                        href = ""

                    row = {
                        "image": img,
                        "title": title,
                        "description": description,
                        "date": date
                    }

                    future = executor.submit(store_data, row, index, href)
                    futures_to_indices[future] = index
            except Exception as e:
                print(f"[{index + 1}] Failed to extract info: {e}")

        for future in as_completed(futures_to_indices):
            index = futures_to_indices[future]
            try:
                result = future.result()
                logger.info(f"Successfully scraped store at index {index}")
            except Exception as e:
                logger.error(f"Error scraping store at index {index}: {e}", exc_info=True)
else:
    logger.warning("No elements found to process.")


2025-05-02 16:59:28,240 - INFO - Start Extraction of  1 form Web
2025-05-02 16:59:28,243 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/mobilebiz (Index: 1)
2025-05-02 16:59:28,367 - INFO - Start Extraction of  2 form Web
2025-05-02 16:59:28,375 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/mobilebiz (Index: 2)
2025-05-02 16:59:28,485 - INFO - Start Extraction of  3 form Web
2025-05-02 16:59:28,494 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/bizdeal (Index: 3)
2025-05-02 16:59:28,659 - INFO - Start Extraction of  4 form Web
2025-05-02 16:59:28,681 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/mobilebiz (Index: 4)


extract_promotions1
extract_promotions1
extract_promotions2


2025-05-02 16:59:48,344 - INFO - Extraction Complete of News 1 form Web
2025-05-02 16:59:48,346 - INFO - Start Extraction of  5 form Web
2025-05-02 16:59:48,352 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/products/mobile-postpaid (Index: 5)
2025-05-02 16:59:48,347 - INFO - Successfully scraped store at index 1


extract_promotions2


2025-05-02 16:59:49,295 - INFO - Extraction Complete of News 3 form Web
2025-05-02 16:59:49,299 - INFO - Start Extraction of  6 form Web
2025-05-02 16:59:49,302 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/products/mobile-postpaid (Index: 6)
2025-05-02 16:59:49,309 - INFO - Successfully scraped store at index 3


extract_promotions1
extract_promotions2
extract_promotions1
extract_promotions2


2025-05-02 17:00:16,256 - INFO - Extraction Complete of News 6 form Web
2025-05-02 17:00:16,259 - INFO - Start Extraction of  7 form Web
2025-05-02 17:00:16,260 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/products/mobile-postpaid (Index: 7)
2025-05-02 17:00:16,261 - INFO - Successfully scraped store at index 6
2025-05-02 17:00:17,108 - INFO - Extraction Complete of News 5 form Web
2025-05-02 17:00:17,111 - INFO - Start Extraction of  8 form Web
2025-05-02 17:00:17,114 - INFO - Start Extraction of Promotions from https://forms.office.com/pages/responsepage.aspx?id=QMBG3t3170iQp7BswXlQ8cOJUOGltOtBrU87HrW5eapURVU3OEJBQ0gxOEVKVVhGWTlNSlI3RVpDNC4u (Index: 8)
2025-05-02 17:00:17,113 - INFO - Successfully scraped store at index 5


extract_promotions1
extract_promotions1
extract_promotions2


2025-05-02 17:00:36,304 - INFO - Extraction Complete of News 7 form Web
2025-05-02 17:00:36,307 - INFO - Start Extraction of  9 form Web
2025-05-02 17:00:36,308 - INFO - Successfully scraped store at index 7
2025-05-02 17:00:36,310 - INFO - Start Extraction of Promotions from https://biz.unifi.com.my/promotions/go-niaga (Index: 9)


extract_promotions1
extract_promotions2


2025-05-02 17:00:45,952 - INFO - Extraction Complete of News 4 form Web
2025-05-02 17:00:45,956 - INFO - Successfully scraped store at index 4


extract_promotions1
extract_promotions2


2025-05-02 17:00:52,211 - INFO - Extraction Complete of News 9 form Web
2025-05-02 17:00:52,221 - INFO - Successfully scraped store at index 9


extract_promotions1
extract_promotions2


2025-05-02 17:01:54,745 - INFO - Extraction Complete of News 2 form Web
2025-05-02 17:01:54,749 - INFO - Successfully scraped store at index 2
2025-05-02 17:03:29,980 - ERROR - Error extracting promotion data from https://forms.office.com/pages/responsepage.aspx?id=QMBG3t3170iQp7BswXlQ8cOJUOGltOtBrU87HrW5eapURVU3OEJBQ0gxOEVKVVhGWTlNSlI3RVpDNC4u: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF69411EFA5+77893]
	GetHandleVerifier [0x00007FF69411F000+77984]
	(No symbol) [0x00007FF693EE91BA]
	(No symbol) [0x00007FF693F3F16D]
	(No symbol) [0x00007FF693F3F41C]
	(No symbol) [0x00007FF693F92237]
	(No symbol) [0x00007FF693F6716F]
	(No symbol) [0x00007FF693F8F07F]
	(No symbol) [0x00007FF693F66F03]
	(No symbol) [0x00007FF693F30328]
	(No symbol) [0x00007FF693F31093]
	GetHandleVerifier [0x00007FF6943D7B6D+2931725]
	GetHandleVerifier [0x00007FF6943D2132+2908626]
	GetHandleVerifier [0x00007FF6943F00F3+3031443]
	GetHandleVerifier [0x00007FF6941391EA+184970]
	GetHandleVerifier [0x00007FF69414086F+21

In [7]:
elements[0].get_attribute("outerHTML")

'<div class="md:max-w-[540px]"><div class="inline-flex items-center rounded-lg px-2.5 py-0.5 font-semibold transition-colors select-none from-tm-accent-orange-500 to-tm-cobalt-blue-600 text-white bg-gradient-to-r relative z-20 mb-2 ml-4 text-sm">Sign Up Now!</div><div class="w-[380px] max-w-full  rounded-2xl xl:w-[380px] 2xl:w-[560px]" style="box-shadow:0 1px 2px 0 rgb(0 0 0 / 0.1)"><div class="card_image relative -mt-6 h-56 w-full"><img alt="Promotion 9" loading="eager" width="800" height="450" decoding="async" data-nimg="1" class="h-56 w-full justify-center object-contain" style="color:transparent" srcset="/_next/image?url=%2Fimages%2Fpromotions%2Fbusinessfiesta%2FUltra_Campaign_V2_Web-Banners-ENG-trapezoid_1_optimized_850.png&amp;w=828&amp;q=75 1x, /_next/image?url=%2Fimages%2Fpromotions%2Fbusinessfiesta%2FUltra_Campaign_V2_Web-Banners-ENG-trapezoid_1_optimized_850.png&amp;w=1920&amp;q=75 2x" src="/_next/image?url=%2Fimages%2Fpromotions%2Fbusinessfiesta%2FUltra_Campaign_V2_Web-Banne

In [8]:
driver.get("https://biz.unifi.com.my/promotions")
