## imports

In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager


## getting the links for each laptop

In [16]:
# Set up Selenium WebDriver
driver = webdriver.Chrome()
driver.get("https://www.jarir.com/sa-en/computers-peripherals/laptops.html")

# Set to scroll until no new products load
SCROLL_PAUSE_TIME = 2
product_links = set()
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    # Scroll down to bottom of the page
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)  # Wait for page to load

    # Extract product links
    products = driver.find_elements(By.CSS_SELECTOR, "a.product-tile__link")
    for product in products:
        link = product.get_attribute("href")
        if link:
            product_links.add(link)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break  # Exit loop if no new products are loaded
    last_height = new_height

# Close the driver
driver.quit()


In [17]:
# Convert product links set to a DataFrame
product_links_df = pd.DataFrame(list(product_links), columns=["Product Links"])

In [18]:
# Save the DataFrame to a CSV file
product_links_df.to_csv("product_links.csv", index=False)

In [19]:
# Print confirmation
print(f"Saved {len(product_links)} product links to 'product_links.csv'")

Saved 205 product links to 'product_links.csv'


## using the links to get the info

In [28]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

# Set up Selenium WebDriver
driver = webdriver.Chrome()

# Load the CSV file containing the product links
product_links_df = pd.read_csv("product_links.csv")
product_links = product_links_df["Product Links"].tolist()#[6:10]

# Initialize an empty list to store product details
product_data = []

# Iterate through each product link
for link in product_links:
    driver.get(link)
    time.sleep(2)  # Wait for the page to load

    # Handle cookie consent overlay
    try:
        # Wait for the "Accept all" button to be visible and clickable
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, ".cmpboxbtnyes"))
        )
        # Click the "Accept all" button to close the cookie consent overlay
        accept_button.click()
        time.sleep(2)  # Wait for the overlay to disappear
    except Exception as e:
        print(f"No cookie overlay found for {link}: {e}")

    # Try to locate and click the "Show more" button inside the specification div
    try:
        show_more_buttons = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, '//div[@data-card="specification"]//a[contains(@class, "card__show")]/span[text()="Show more"]'))
        )
        if len(show_more_buttons) > 0:
            # Click the first "Show more" button found in the specification div
            show_more_buttons[0].click()
            print("Clicked the 'Show more' button in specification section")
            # Wait for the content to load after clicking the first button
            WebDriverWait(driver, 10).until(
                EC.invisibility_of_element_located((By.XPATH, '//div[@data-card="specification"]//a[contains(@class, "card__show")]/span[text()="Show more"]'))
            )
            print("Specification content loaded")
        else:
            print(f"No 'Show more' button found in specification section for {link}")
    except Exception as e:
        print(f"Error clicking the 'Show more' button for {link}: {e}")

    # Extract the product title
    try:
        product_title = driver.find_element(By.CSS_SELECTOR, "h2.product-title__title").text.strip()
    except Exception as e:
        product_title = "Not available"
        print(f"Error extracting product title for {link}: {e}")

    # Extract the product price
    try:
        product_price = driver.find_element(By.CSS_SELECTOR, ".price_alignment .price__currency + span").text.strip()
    except Exception as e:
        product_price = "Not available"
        print(f"Error extracting product price for {link}: {e}")

    # Scroll to the bottom of the page after clicking the "Show more" button
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)  # Wait for the page to fully load and new content to appear

    # Find the product specification table
    rows = driver.find_elements(By.CSS_SELECTOR, "table.table--info.table--bordered-bottom .table__row")

    # Dictionary to hold the details for each product
    product_info = {
        "Product Title": product_title,
        "Product Price": product_price
    }

    for row in rows:
        # Extract the column name and value
        try:
            column_name = row.find_element(By.CSS_SELECTOR, "th.table__item").text.strip()
            value = row.find_element(By.CSS_SELECTOR, "td.table__item").text.strip()
            product_info[column_name] = value
        except Exception as e:
            print(f"Skipping a row due to missing data: {e}")

    # Append the product info to the product_data list
    product_data.append(product_info)

# Convert the list of dictionaries to a DataFrame
product_details_df = pd.DataFrame(product_data)

# Save the DataFrame to a CSV file
product_details_df.to_csv("product_details.csv", index=False)

# Print confirmation
print(f"Saved product details to 'product_details.csv'")

# Close the driver
driver.quit()


No cookie overlay found for https://www.jarir.com/sa-en/huawei-laptops-638525.html: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF71A8D2775+28773]
	(No symbol) [0x00007FF71A83AFB0]
	(No symbol) [0x00007FF71A6D552A]
	(No symbol) [0x00007FF71A728EAE]
	(No symbol) [0x00007FF71A72919C]
	(No symbol) [0x00007FF71A7723F7]
	(No symbol) [0x00007FF71A74EFDF]
	(No symbol) [0x00007FF71A76F1A4]
	(No symbol) [0x00007FF71A74ED43]
	(No symbol) [0x00007FF71A71A548]
	(No symbol) [0x00007FF71A71B6B1]
	GetHandleVerifier [0x00007FF71ABFF45D+3358029]
	GetHandleVerifier [0x00007FF71AC1430D+3443709]
	GetHandleVerifier [0x00007FF71AC083FD+3394797]
	GetHandleVerifier [0x00007FF71A99929B+842635]
	(No symbol) [0x00007FF71A84654F]
	(No symbol) [0x00007FF71A841FA4]
	(No symbol) [0x00007FF71A84213D]
	(No symbol) [0x00007FF71A831629]
	BaseThreadInitThunk [0x00007FFB8E2D257D+29]
	RtlUserThreadStart [0x00007FFB8F9AAF08+40]

Clicked the 'Show more' button in specification section
Specification content loaded
No coo

JavascriptException: Message: javascript error: Cannot read properties of null (reading 'scrollHeight')
  (Session info: chrome=131.0.6778.69)
Stacktrace:
	GetHandleVerifier [0x00007FF71A8D2775+28773]
	(No symbol) [0x00007FF71A83AFB0]
	(No symbol) [0x00007FF71A6D552A]
	(No symbol) [0x00007FF71A6DC09F]
	(No symbol) [0x00007FF71A6DF0CF]
	(No symbol) [0x00007FF71A770460]
	(No symbol) [0x00007FF71A74EF9A]
	(No symbol) [0x00007FF71A76F1A4]
	(No symbol) [0x00007FF71A74ED43]
	(No symbol) [0x00007FF71A71A548]
	(No symbol) [0x00007FF71A71B6B1]
	GetHandleVerifier [0x00007FF71ABFF45D+3358029]
	GetHandleVerifier [0x00007FF71AC1430D+3443709]
	GetHandleVerifier [0x00007FF71AC083FD+3394797]
	GetHandleVerifier [0x00007FF71A99929B+842635]
	(No symbol) [0x00007FF71A84654F]
	(No symbol) [0x00007FF71A841FA4]
	(No symbol) [0x00007FF71A84213D]
	(No symbol) [0x00007FF71A831629]
	BaseThreadInitThunk [0x00007FFB8E2D257D+29]
	RtlUserThreadStart [0x00007FFB8F9AAF08+40]


In [32]:
product_details_df.head()

Unnamed: 0,Product Title,Product Price,Warranty Period (Months),Accesorises warranty,Warranted by,SKU,Manufacturer Number,processor type,NPU (Neural Processing Unit),AI (Artificial Intelligence),...,fingerprint,battery type,width,height/length,depth,weight,card reader,Shipping Weight (kg),Renewed Grade,Renewed Remarks
0,Huawei MateBook 14 Laptop,4299.0,24,12 Months,Manufacturer,638525,53014ASK,Intel Core Ultra 5 125H,Intel AI Boost,Yes,...,Yes,Lithium Polymer (Li-Po),312.60 mm ( 12.31 in ),226.80 mm ( 8.93 in ),15.90 mm ( .63 in ),1.31 kg ( 2.89 lb ),,,,
1,Apple MacBook Air 15 M3 Laptop,5999.0,24,12 Months,Manufacturer,646475,MC9D4ABA,M3 8-core CPU,16-core Neural Engine,Yes,...,Yes,Lithium Polymer (Li-Po),34.04 cm ( 13.40 in ),23.76 cm ( 9.35 in ),1.15 cm ( .45 in ),1.51 kg ( 3.33 lb ),,,,
2,Apple MacBook Air 15 M3 Laptop,5999.0,24,12 Months,Manufacturer,632673,MRYR3ABA,M3 8-core CPU,16-core Neural Engine,Yes,...,Yes,Lithium Polymer (Li-Po),34.04 cm ( 13.40 in ),23.76 cm ( 9.35 in ),1.15 cm ( .45 in ),1.51 kg ( 3.33 lb ),,,,
3,Asus Laptop,,24,12 Months,Manufacturer,642879,UM3406OLEDR7W,AMD Ryzen 7-8840HS,AMD XDNA up to 16 TOPS,Yes,...,Yes,Lithium Ion (Li-Ion) - 4 Cell,32.24 cm ( 12.69 in ),22.01 cm ( 8.67 in ),1.49 cm ( .59 in ),1.20 kg ( 2.65 lb ),Yes,,,
4,Microsoft Surface Laptop 7 Laptop,,24,Not Applicable,JARIR,639206,ZHI00014,Snapdragon X Elite 12 Core,Qualcomm Hexagon NPU 45 TOPS,Yes,...,,Lithium Ion (Li-Ion) - 6 Cell,32.90 cm ( 12.95 in ),23.90 cm ( 9.41 in ),1.80 cm ( .71 in ),1.60 kg ( 3.53 lb ),,,,


In [30]:
product_details_df.columns

Index(['Product Title', 'Product Price', 'Warranty Period (Months)',
       'Accesorises warranty', 'Warranted by', 'SKU', 'Manufacturer Number',
       'processor type', 'NPU (Neural Processing Unit)',
       'AI (Artificial Intelligence)', 'display type', 'graphics card',
       'display size', 'operating system', 'product type',
       'model/chipset number', 'model series', 'generation/release', 'color',
       'operating system architecture', 'processor speed', 'capacity', 'RAM',
       'connectivity technology', 'ports', 'touch display',
       'display resolution', 'cam resolution', 'multimedia features',
       'special features', 'fingerprint', 'battery type', 'width',
       'height/length', 'depth', 'weight', 'card reader',
       'Shipping Weight (kg)', 'Renewed Grade', 'Renewed Remarks'],
      dtype='object')

In [31]:
product_details_df.shape

(205, 40)