In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager


import pandas as pd

In [2]:
# Set up Chrome options if needed
chrome_options = webdriver.ChromeOptions()
# Add any desired options here, e.g., headless mode
# chrome_options.add_argument('--headless')

# Create a Service object with the path to the ChromeDriver
service = Service(ChromeDriverManager().install())

# Initialize the WebDriver with the Service object and options
driver = webdriver.Chrome(service=service, options=chrome_options)

# Your automation code here

driver.quit()


In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
import time
import re

In [4]:

def setup_driver():
    """Set up and return a configured Chrome WebDriver."""
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode (no UI)
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--window-size=1920,1080")
    chrome_options.add_argument("--disable-notifications")
    
    # Initialize the Chrome driver
    driver = webdriver.Chrome(options=chrome_options)
    return driver


In [5]:

def get_shop_details(stores):
    """Visit each shop page and extract detailed information."""
    driver = setup_driver()
    detailed_data = []
    # stores = pd.read_csv("sunway_pyramid_shops.csv")

    i = 0  # Counter for testing, remove later
    
    for _, store in stores.iterrows():

        shop_url = store.get("url", "")
        if not shop_url:
            continue  # Skip if URL is missing

        try:
            driver.get(shop_url)
            
            # Wait for the shop page to load
            WebDriverWait(driver, 3).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, ".logocontainer-shopview"))
            )
            
            # Extract shop details
            try:
                logo_element = driver.find_element(By.CSS_SELECTOR, ".logocontainer-shopview img")
                logo = logo_element.get_attribute("src")
            except:
                logo = "N/A"
            
            # Extract shop category
            try:
                category = driver.find_element(By.CSS_SELECTOR, ".__rounded-pill").text.strip()
            except:
                category = "N/A"
                
            # Extract shop description
            try:
                description = driver.find_element(By.CSS_SELECTOR, ".font-normal.long-shop-text-scroll").text.strip()
            except:
                description = "N/A"
                
            # Extract location
            try:
                location = driver.find_element(By.XPATH, "//a[@href='#mapZone']").text.strip()
            except:
                location = "N/A"

            detailed_data.append({
                "name": store["name"],
                "img":store["img"],
                "url": shop_url,
                "logo": logo,
                "category": category,
                "description": description,
                "location": location
            })
            
        except Exception as e:
            print(f"Error scraping details from {shop_url}: {e}")
    
    driver.quit()
    
    # Create DataFrame and save to CSV
    if detailed_data:
        detailed_df = pd.DataFrame(detailed_data)
        detailed_df.to_csv("sunway_pyramid_shop_details.csv", index=False)
        return detailed_df
    
    return None



In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException

def extract_texts_from_element(driver, locator):
    """
    Robust method to extract texts from an element
    
    Args:
        driver (webdriver): Selenium WebDriver instance
        locator (tuple): Selenium locator strategy (By, value)
    
    Returns:
        list: Unique text contents of spans
    """
    try:
        # Wait for the element to be present
        parent_div = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located(locator)
        )
        
        # Verbose logging
        print("Element found successfully!")
        
        # Find all span elements
        spans = parent_div.find_elements(By.TAG_NAME, 'span')
        
        # Print number of spans found
        print(f"Number of spans found: {len(spans)}")
        
        # Extract text from spans, filtering out empty and duplicate texts
        texts = []
        for span in spans:
            try:
                text = span.text.strip()
                if text and text not in texts:
                    texts.append(text)
                    print(f"Extracted text: {text}")
            except Exception as span_error:
                print(f"Error extracting text from a span: {span_error}")
        
        return texts
    
    except TimeoutException:
        print("Timeout: Element not found within the specified time.")
        return []
    
    except NoSuchElementException:
        print("Element not found on the page.")
        return []
    
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return []

def main():
    # Setup WebDriver (Chrome in this example)
    driver = webdriver.Chrome()
    
    try:
        # Navigate to the page
        driver.get("YOUR_URL_HERE")
        
        # Multiple locator strategies to try
        locator_strategies = [
            (By.CLASS_NAME, 'W4Efsd'),  # Try class name
            (By.XPATH, '//div[contains(@class, "W4Efsd")]'),  # Try XPath
            (By.CSS_SELECTOR, 'div.W4Efsd')  # Try CSS Selector
        ]
        
        # Try different locator strategies
        for locator in locator_strategies:
            print(f"\nTrying locator: {locator}")
            texts = extract_texts_from_element(driver, locator)
            
            if texts:
                print("\nSuccessfully extracted texts:")
                print(texts)
                break
        else:
            print("Could not find the element using any of the provided strategies.")
    
    except Exception as e:
        print(f"An error occurred in main function: {e}")
    
    finally:
        # Close the browser
        driver.quit()

In [55]:
def extract_texts_from_element(driver, element):
    """
    Extract text from all span elements within a given element
    
    Args:
        driver (webdriver): Selenium WebDriver instance
        element (WebElement): Parent element containing spans
    
    Returns:
        list: Unique text contents of spans
    """
    # Find all span elements within the parent element
    spans = element.find_elements(By.TAG_NAME, 'span')
    
    # Extract text from spans, filtering out empty and duplicate texts
    texts = []
    for span in spans:
        text = span.text.strip()
        if text and text not in texts:
            texts.append(text)
    
    return texts

    
async def scrape_sunway_pyramid_directory():
    """Scrape the Sunway Pyramid mall directory."""
    url = "https://www.google.com/maps/place/Othaim+Mall/@24.6856632,44.1387095,7z/data=!4m11!1m2!2m1!1sothaim+mall!3m7!1s0x3e2f06e87cc9db33:0x1ba11d2574bec9d3!8m2!3d24.6856632!4d46.7754282!10e3!15sCgtvdGhhaW0gbWFsbCIDiAEBWg0iC290aGFpbSBtYWxskgEPc2hvcHBpbmdfY2VudGVy4AEA!16s%2Fg%2F11c1qb1ptg?entry=ttu&g_ep=EgoyMDI1MDIyNi4xIKXMDSoJLDEwMjExNDUzSAFQAw%3D%3D"
    driver = setup_driver()
    
    try:
        print("Accessing the Sunway Pyramid directory...")
        driver.get(url)
        
        # Wait for the page to load
        WebDriverWait(driver, 3).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, ".lXJj5c"))
        )

        # Allow some time for initial content to load
        i=0
        # Click "Load More" until it disappears
        while True:
            try:
                i=i+1
                # Find all buttons with class ".btn-outline-danger"
                buttons = driver.find_elements(By.CSS_SELECTOR, ".lXJj5c")
                # Click only the button that contains "Load More"
                load_more_clicked = False
                for button in buttons:
                    if "" in button.text:
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)  # Scroll into view
                        time.sleep(1)  # Allow time for scrolling
                        # button.click()
                        time.sleep(2)  # Wait for new items to load
                        load_more_clicked = True
                        break  # Stop looking for other buttons once clicked
                
                # If no button was clicked, exit loop
                if not load_more_clicked or i==2:
                    print("No more 'Load More' button. All items loaded.")
                    break  

            except Exception as e:
                print(f"Error clicking 'Load More': {e}")
                break  # Exit loop if an error occurs


        # Extract all shop elements
        shop_data = []
        par = driver.find_element(By.XPATH, "//*[contains(@class, 'm6QErb') and contains(@class, 'DxyBCb') and contains(@class, 'kA9KIf') and contains(@class, 'dS8AEf') and contains(@class, 'XiKgde')]")
        cards = par.find_elements(By.CSS_SELECTOR, ".lI9IFe")
        print(cards)
        for card in cards:
            try:
                # Extract shop name
                title = card.find_element(By.CSS_SELECTOR, '.fontHeadlineSmall ').text.strip()
                print(title)
                rating =  card.find_element(By.CSS_SELECTOR, '.ZkP5Je').get_attribute("aria-label")
                
                 # Try to find and extract texts
                locator_strategies = [
                    (By.CLASS_NAME, 'W4Efsd'),
                    (By.XPATH, '//div[contains(@class, "W4Efsd")][2]'),
                    (By.CSS_SELECTOR, 'div.W4Efsd:nth-of-type(2)')
                ]
                
                for locator in locator_strategies:
                    print(f"\nTrying locator: {locator}")
                    
                    # Find the element
                    try:
                        element = driver.find_element(*locator)
                        print("Element found successfully!")
                        
                        # Find spans within the element
                        spans = element.find_elements(By.TAG_NAME, 'span')
                        print(f"Number of spans found: {len(spans)}")
                        
                        # Extract and print texts
                        texts = [span.text.strip() for span in spans if span.text.strip()]
                        print("Extracted texts:")
                        for text in texts:
                            print(text)
                        
                        return texts
                    
                    except Exception as e:
                        print(f"Error with locator {locator}: {e}")
                
                print("Could not find the element using any strategy.")
            

                # img_url = card.find_element(By.TAG_NAME, 'img').get_attribute("src")

                # Append to list
                shop_data.append({"name": title})
            except Exception as e:
                print(f"Error processing card: {e}")

        # Convert to DataFrame and save as CSV
        df = pd.DataFrame(shop_data)
        # df.to_csv("sunway_pyramid_shops.csv", index=False)
        print(f"Successfully scraped {len(shop_data)} shops from Sunway Pyramid directory.")

        return df

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        driver.quit()


In [56]:

# Scrape the main directory
shops_df = await scrape_sunway_pyramid_directory()

Accessing the Sunway Pyramid directory...
No more 'Load More' button. All items loaded.
[<selenium.webdriver.remote.webelement.WebElement (session="22a3408a8c9c2b8d8b139e2f2a8cbf4c", element="f.91B93C6B0FBCA1F50A4E91789A072CD8.d.C13C35B655FD4AE46E331F7C623B9B91.e.67")>, <selenium.webdriver.remote.webelement.WebElement (session="22a3408a8c9c2b8d8b139e2f2a8cbf4c", element="f.91B93C6B0FBCA1F50A4E91789A072CD8.d.C13C35B655FD4AE46E331F7C623B9B91.e.68")>, <selenium.webdriver.remote.webelement.WebElement (session="22a3408a8c9c2b8d8b139e2f2a8cbf4c", element="f.91B93C6B0FBCA1F50A4E91789A072CD8.d.C13C35B655FD4AE46E331F7C623B9B91.e.69")>, <selenium.webdriver.remote.webelement.WebElement (session="22a3408a8c9c2b8d8b139e2f2a8cbf4c", element="f.91B93C6B0FBCA1F50A4E91789A072CD8.d.C13C35B655FD4AE46E331F7C623B9B91.e.70")>, <selenium.webdriver.remote.webelement.WebElement (session="22a3408a8c9c2b8d8b139e2f2a8cbf4c", element="f.91B93C6B0FBCA1F50A4E91789A072CD8.d.C13C35B655FD4AE46E331F7C623B9B91.e.71")>, <

In [22]:
# shops_df

In [42]:
get_shop_details(shops_df)

Unnamed: 0,name,img,url,logo,category,description,location
0,1 MOBILE STORE,https://www.sunwaypyramid.com/static/shops/2eb...,https://www.sunwaypyramid.com/directory/1-mobi...,https://www.sunwaypyramid.com/static/shops/a2c...,Digital Lifestyle,1 Mobile Store (also known as One Specialist M...,F1.39 & F1.39A (First Floor)
1,1 Specialist Mobile,https://www.sunwaypyramid.com/static/shops/273...,https://www.sunwaypyramid.com/directory/1-spec...,https://www.sunwaypyramid.com/static/shops/ed2...,Digital Lifestyle,Retailing of gadget accessories and related pr...,F1.38A (First Floor)
2,100 Vision,https://www.sunwaypyramid.com/static/shops/66e...,https://www.sunwaypyramid.com/directory/100-vi...,https://www.sunwaypyramid.com/static/shops/315...,Eyewear,"Retailing of eyewear, sunglasses and related p...",F1.101 (First Floor)
3,108 Matcha Saro,https://www.sunwaypyramid.com/static/shops/d82...,https://www.sunwaypyramid.com/directory/108-ma...,https://www.sunwaypyramid.com/static/shops/630...,Food & Beverages,108 Matcha Saro celebrates the rich tradition ...,GF.122C (Ground Floor)
4,30DEGREES,https://www.sunwaypyramid.com/static/shops/1f8...,https://www.sunwaypyramid.com/directory/30degrees,https://www.sunwaypyramid.com/static/shops/fb8...,Fashion,"Retailing of ladies sports fashion, lingerie a...",LG1.02 (Lower Ground 1 Floor)
...,...,...,...,...,...,...,...
817,iORA,https://www.sunwaypyramid.com/static/shops/03a...,https://www.sunwaypyramid.com/directory/iora,https://www.sunwaypyramid.com/static/shops/07e...,Fashion,The heart of the business is well-made apparel...,"LG1.06, 07 & 10 (Lower Ground 1 Floor)"
818,llao llao,https://www.sunwaypyramid.com/static/shops/14c...,https://www.sunwaypyramid.com/directory/llao-llao,https://www.sunwaypyramid.com/static/shops/96d...,Food & Beverages,Retailing of frozen yogurt and related products,F1.49 (First Floor)
819,myEureka Snack Bar,https://www.sunwaypyramid.com/static/shops/5bc...,https://www.sunwaypyramid.com/directory/myeure...,https://www.sunwaypyramid.com/static/shops/97c...,Food & Beverages,Retailing of assorted flavoured popcorn.,F1.100D (First Floor)
820,nononsense coffee,https://www.sunwaypyramid.com/static/shops/bc9...,https://www.sunwaypyramid.com/directory/nonons...,https://www.sunwaypyramid.com/static/shops/5c5...,Food & Beverages,"Instilled with the elements of an Otaku Room, ...",LG1.33 - 37 (inside Atmos) (Lower Ground 1 Floor)


In [34]:
# get_shop_details()

Unnamed: 0,name,url,logo,category,description,location
0,1 MOBILE STORE,https://www.sunwaypyramid.com/directory/1-mobi...,https://www.sunwaypyramid.com/static/shops/a2c...,Digital Lifestyle,1 Mobile Store (also known as One Specialist M...,F1.39 & F1.39A (First Floor)
1,1 Specialist Mobile,https://www.sunwaypyramid.com/directory/1-spec...,https://www.sunwaypyramid.com/static/shops/ed2...,Digital Lifestyle,Retailing of gadget accessories and related pr...,F1.38A (First Floor)
2,100 Vision,https://www.sunwaypyramid.com/directory/100-vi...,https://www.sunwaypyramid.com/static/shops/315...,Eyewear,"Retailing of eyewear, sunglasses and related p...",F1.101 (First Floor)
3,108 Matcha Saro,https://www.sunwaypyramid.com/directory/108-ma...,https://www.sunwaypyramid.com/static/shops/630...,Food & Beverages,108 Matcha Saro celebrates the rich tradition ...,GF.122C (Ground Floor)
4,30DEGREES,https://www.sunwaypyramid.com/directory/30degrees,https://www.sunwaypyramid.com/static/shops/fb8...,Fashion,"Retailing of ladies sports fashion, lingerie a...",LG1.02 (Lower Ground 1 Floor)
...,...,...,...,...,...,...
817,iORA,https://www.sunwaypyramid.com/directory/iora,https://www.sunwaypyramid.com/static/shops/07e...,Fashion,The heart of the business is well-made apparel...,"LG1.06, 07 & 10 (Lower Ground 1 Floor)"
818,llao llao,https://www.sunwaypyramid.com/directory/llao-llao,https://www.sunwaypyramid.com/static/shops/96d...,Food & Beverages,Retailing of frozen yogurt and related products,F1.49 (First Floor)
819,myEureka Snack Bar,https://www.sunwaypyramid.com/directory/myeure...,https://www.sunwaypyramid.com/static/shops/97c...,Food & Beverages,Retailing of assorted flavoured popcorn.,F1.100D (First Floor)
820,nononsense coffee,https://www.sunwaypyramid.com/directory/nonons...,https://www.sunwaypyramid.com/static/shops/5c5...,Food & Beverages,"Instilled with the elements of an Otaku Room, ...",LG1.33 - 37 (inside Atmos) (Lower Ground 1 Floor)
