# This script scrape products on framedirect.com/eyeglasses with basic details


In [35]:
import csv
import json
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

# Step 1 - Configuration and Data Fetching

In [36]:
# Setup Selenium and WebDriver
print("Setting up webdriver...")
chrome_option = Options()
chrome_option.add_argument('--headless')
chrome_option.add_argument('--disable-gpu')
chrome_option.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.6778.265 Safari/537.36"
)
print("done setting up..")

Setting up webdriver...
done setting up..


In [37]:
# Install the chrome driver (This is a one time thing)
print("Installing Chrome WD")
service = Service(ChromeDriverManager().install())
print("Final Setup")
driver = webdriver.Chrome(service=service, options=chrome_option)
print("Done")

Installing Chrome WD
Final Setup
Done


In [38]:
# Make connection and get URL content
url = "https://www.framesdirect.com/eyeglasses/"
print(f"Visting {url} page")
driver.get(url)



Visting https://www.framesdirect.com/eyeglasses/ page


In [39]:
# Further instruction: wait for JS to load the files
try:
    print("Waiting for product tiles to load")
    WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.ID, 'product-list-container'))
    )
    print("Done...Proceed to parse the data")
except (TimeoutError, Exception) as e:
    print(f"Error waiting for {url}: {e}")
    driver.quit()
    print("Closed")

Waiting for product tiles to load
Done...Proceed to parse the data


In [40]:
# Get page source and parse using BeautifulSoup
content = driver.page_source
page = BeautifulSoup(content, 'html.parser')

from pprint import pprint
pprint(page)

<html class="" lang="en-US" xml:lang="en-US" xmlns="https://www.w3.org/1999/xhtml" xmlns:fb="https://www.facebook.com/2008/fbml" xmlns:og="https://ogp.me/ns#"><head id="ctl00_Head1"><script async="" src="https://dc.oracleinfinity.io/z7uzysa9yz/wtid.js?callback=ORA.analytics.dcsRef.dcsGetIdCallback" type="text/javascript"></script><script src="https://dkpklk99llpj0.cloudfront.net/wexl4mta_1742824213734.js"></script><link href="https://f.monetate.net" rel="dns-prefetch"/><link href="https://sb.monetate.net" rel="dns-prefetch"/><script async="" src="https://product-initjs.prod.rfksrv.com/js/reflektion.js" type="text/javascript"></script><script src="https://am.freshrelevance.com/v/?w=wexl4mta"></script>
<iframe role="presentation" src="javascript:void(0)" style="width: 0px; height: 0px; border: 0px; display: none;" title=""></iframe><script async="" src="https://www.googletagmanager.com/gtag/destination?id=G-7EGSC4TDV5&amp;cx=c&amp;gtm=4e5931" type="text/javascript"></script><script async

In [None]:
# Temporary storage for the extracted data
glasses_data = []

# Locate all product holders and extract the data for each product.
product_holders = page.find_all("div", class_='prod-holder')
print(f"Found {len(product_holders)} products")

for holder in product_holders:
    product_info = holder.find("div", class_='catalog-container')
    

    if product_info:
        brand_tag = product_info.find('div', class_='catalog-name')
        brand = brand_tag.text if brand_tag else None # product brand

    else:
        brand =None
    # Product Name

    # productname= holder.find('div', class_='prod-title prod-model')

    #if productname:
    name_tag =holder.find('div', class_ = 'product_name')
    name = name_tag.text if name_tag else None


    discount_tag = holder.find('div', class_='frame-discount')
    discount = discount_tag.text if discount_tag else None

    # for price
    price_cnt = holder.find('div', class_='prod-price-wrap')
    if price_cnt:
        # Retail Price
        retail_price_tag = price_cnt.find('div', class_='prod-catalog-retail-price')
        retail_price = retail_price_tag.text if retail_price_tag else None
        # Discounted Price
        discounted_price_tag = price_cnt.find('div', class_='prod-aslowas')
        discounted_price = discounted_price_tag.text if discounted_price_tag else None
    else:
        retail_price = discounted_price = None
        # Automatically applies missing value, if the product info is not available.
    
    

    # Assignment: Add the category
            
    data = {
        'Brand': brand,
        'Product_Name': name,
        'Retail_Price': retail_price,
        'Discounted_Price': discounted_price,
        'Discount': discount
    }
    # Append data to the list
    glasses_data.append(data)

Found 25 products


In [45]:
# Save to CSV file
column_name = glasses_data[0].keys() # get the column names
with open('framedirect_data.csv', mode='w', newline='', encoding='utf-8') as csv_file: # open up the file with context manager
    dict_writer = csv.DictWriter(csv_file, fieldnames=column_name)
    dict_writer.writeheader()
    dict_writer.writerows(glasses_data)
print(f"Saved {len(glasses_data)} records to CSV")

Saved 25 records to CSV


In [46]:
# Save to JSON file
with open("framedirect_data.json", mode='w') as json_file:
    json.dump(glasses_data, json_file, indent=4)
print(f"Saved {len(glasses_data)} records to JSON")

# close the browser
driver.quit()
print("End of Web Extraction")

Saved 25 records to JSON
End of Web Extraction
