<a href="https://colab.research.google.com/github/dhahbimohamed/PC-Parts-Recommender-System/blob/main/Notebooks/Ebtihal_benChabeen_scrape_gpus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install playwright beautifulsoup4 pandas nest_asyncio
!playwright install
!python pangoly_cpu_scraper.py

Collecting playwright
  Downloading playwright-1.55.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting pyee<14,>=13 (from playwright)
  Downloading pyee-13.0.0-py3-none-any.whl.metadata (2.9 kB)
Downloading playwright-1.55.0-py3-none-manylinux1_x86_64.whl (45.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.9/45.9 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyee-13.0.0-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.55.0 pyee-13.0.0
Downloading Chromium 140.0.7339.16 (playwright build v1187)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1187/chromium-linux.zip[22m
[1G173.7 MiB [] 0% 0.0s[0K[1G173.7 MiB [] 0% 33.4s[0K[1G173.7 MiB [] 0% 15.8s[0K[1G173.7 MiB [] 0% 8.0s[0K[1G173.7 MiB [] 1% 5.5s[0K[1G173.7 MiB [] 1% 4.7s[0K[1G173.7 MiB [] 2% 3.7s[0K[1G173.7 MiB [] 3% 3.2s[0K[1G173.7 MiB [] 4% 3.0s[0K[1G173.7 MiB [] 5% 2.9s

In [5]:
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from urllib.parse import urljoin
import random
import nest_asyncio # Import nest_asyncio

nest_asyncio.apply() # Apply nest_asyncio


# ---------------------- CONFIGURATION ----------------------
BASE_URL = "https://pangoly.com"
SEARCH_URL = "https://pangoly.com/en/browse/vga"

HEADERS = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}

USER_AGENTS = [
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0",
]


# ---------------------- GPU SPEC EXTRACTOR ----------------------
def extract_specs_from_product_page(soup):
    """Extract detailed specifications from pangoly GPU product page HTML"""
    specs = {}

    # Method 1: Specifications table (primary method based on pangoly structure)
    table_specs = extract_from_specs_table(soup)
    specs.update(table_specs)

    # Method 2: Product title and header
    title_specs = extract_from_title_header(soup)
    specs.update(title_specs)

    return specs


def extract_from_specs_table(soup):
    """Extract specifications from the specifications table"""
    specs = {}

    # Look for specifications table
    spec_table = soup.find('table', class_='table-bordered')
    if not spec_table:
        return specs

    rows = spec_table.find_all('tr')

    for row in rows:
        cells = row.find_all('td')
        if len(cells) == 2:
            key = cells[0].get_text(strip=True).lower()
            value = cells[1].get_text(strip=True)

            # Map specification names to our fields
            if 'model' in key:
                specs['Model'] = value
            elif 'release date' in key:
                specs['Release Date'] = value
            elif 'gpu interface' in key:
                specs['Interface'] = value
            elif 'gpu chipset' in key:
                specs['GPU Chipset'] = value
            elif 'gpu memory size' in key:
                specs['Memory Size'] = value
            elif 'gpu memory type' in key:
                specs['Memory Type'] = value
            elif 'gpu clock rate' in key and 'boost' not in key:
                specs['Base Clock'] = value
            elif 'gpu boost clock rate' in key:
                specs['Boost Clock'] = value
            elif 'tdp' in key:
                specs['TDP'] = value
            elif 'length' in key:
                specs['Length'] = value
            elif 'cooling type' in key:
                specs['Cooling Type'] = value
            elif 'power connectors' in key:
                specs['Power Connectors'] = value
            elif 'sli support' in key:
                specs['SLI Support'] = value
            elif 'crossfire support' in key:
                specs['CrossFire Support'] = value
            elif 'displayport' in key and 'mini' not in key:
                specs['DisplayPort'] = value
            elif 'hdmi' in key and 'mini' not in key:
                specs['HDMI'] = value
            elif 'g-sync support' in key:
                specs['G-SYNC Support'] = value
            elif 'freesync support' in key:
                specs['FreeSync Support'] = value
            elif 'expansion slots required' in key:
                specs['Expansion Slots'] = value

    return specs


def extract_from_title_header(soup):
    """Extract basic info from product title and header"""
    specs = {}

    # Extract from main title
    title = soup.find('h2')
    if title:
        title_text = title.get_text().lower()

        # Brand detection
        brands = ['asus', 'msi', 'gigabyte', 'sapphire', 'powercolor', 'xfx', 'zotac', 'evga', 'pny', 'inno3d', 'galax', 'palit', 'gainward']
        for brand in brands:
            if brand in title_text:
                specs['Brand'] = brand.title()
                break

        # GPU Brand detection
        if 'nvidia' in title_text or 'geforce' in title_text or 'rtx' in title_text or 'gtx' in title_text:
            specs['GPU Brand'] = 'NVIDIA'
        elif 'amd' in title_text or 'radeon' in title_text or 'rx' in title_text:
            specs['GPU Brand'] = 'AMD'
        elif 'intel' in title_text or 'arc' in title_text:
            specs['GPU Brand'] = 'Intel'

        # GPU Series detection
        if 'rtx' in title_text:
            if 'rtx 5090' in title_text:
                specs['GPU Series'] = 'RTX 5090'
            elif 'rtx 5080' in title_text:
                specs['GPU Series'] = 'RTX 5080'
            elif 'rtx 5070' in title_text:
                specs['GPU Series'] = 'RTX 5070'
            elif 'rtx 5060' in title_text:
                specs['GPU Series'] = 'RTX 5060'
            elif 'rtx 5050' in title_text:
                specs['GPU Series'] = 'RTX 5050'
            elif 'rtx 4090' in title_text:
                specs['GPU Series'] = 'RTX 4090'
            elif 'rtx 4080' in title_text:
                specs['GPU Series'] = 'RTX 4080'
            elif 'rtx 4070' in title_text:
                specs['GPU Series'] = 'RTX 4070'
            elif 'rtx 4060' in title_text:
                specs['GPU Series'] = 'RTX 4060'
            elif 'rtx 3090' in title_text:
                specs['GPU Series'] = 'RTX 3090'
            elif 'rtx 3080' in title_text:
                specs['GPU Series'] = 'RTX 3080'
            elif 'rtx 3070' in title_text:
                specs['GPU Series'] = 'RTX 3070'
            elif 'rtx 3060' in title_text:
                specs['GPU Series'] = 'RTX 3060'
        elif 'rx' in title_text:
            if 'rx 7900' in title_text:
                specs['GPU Series'] = 'RX 7900'
            elif 'rx 7800' in title_text:
                specs['GPU Series'] = 'RX 7800'
            elif 'rx 7700' in title_text:
                specs['GPU Series'] = 'RX 7700'
            elif 'rx 7600' in title_text:
                specs['GPU Series'] = 'RX 7600'
        elif 'arc' in title_text:
            if 'arc a770' in title_text:
                specs['GPU Series'] = 'Arc A770'
            elif 'arc a750' in title_text:
                specs['GPU Series'] = 'Arc A750'
            elif 'arc a580' in title_text:
                specs['GPU Series'] = 'Arc A580'

    return specs


# ---------------------- PRICE AND AVAILABILITY EXTRACTOR ----------------------
def extract_price_pangoly(soup):
    """Extract price from pangoly product page"""
    price_selectors = [
        '.amprice span',
        '.price span',
        '.table-prices strong'
    ]

    for selector in price_selectors:
        price_elements = soup.select(selector)
        for price_element in price_elements:
            price_text = price_element.get_text(strip=True)
            # Look for price pattern
            price_match = re.search(r'\$(\d+(?:,\d+)*(?:\.\d{2})?)', price_text)
            if price_match:
                return price_match.group(0)  # Return with $ symbol

    return "Price not found"


def extract_availability_pangoly(soup):
    """Extract availability information from pangoly"""
    availability_selectors = [
        '.label-success',
        '.label-danger',
        '.table-prices .label'
    ]

    for selector in availability_selectors:
        availability_elements = soup.select(selector)
        for element in availability_elements:
            availability_text = element.get_text(strip=True)
            if availability_text in ['In Stock', 'Out of Stock']:
                return availability_text

    return "Availability not specified"


def extract_rating_reviews_pangoly(soup):
    """Extract rating and reviews from pangoly product page"""
    rating = "No rating"
    reviews = "0"

    # Look for rating in star display
    rating_text = soup.get_text()
    if '★★★★★' in rating_text:
        rating = "5/5"
    elif '★★★★' in rating_text and '★' in rating_text:
        rating = "4/5"
    elif '★★★' in rating_text and '★' in rating_text:
        rating = "3/5"

    # Look for review count
    review_match = re.search(r'\((\d+)\s*reviews?\)', rating_text)
    if review_match:
        reviews = review_match.group(1)

    return rating, reviews


def extract_seller_prices(soup):
    """Extract detailed pricing information from all sellers including product links"""
    sellers_data = []

    prices_table = soup.find('table', class_='table-prices')
    if prices_table:
        rows = prices_table.find_all('tr')[1:]  # Skip header row

        for row in rows:
            # Skip info rows
            if 'offer-row' in row.get('class', []):
                continue

            cells = row.find_all('td')
            if len(cells) >= 5:
                # Extract seller information
                seller_img = cells[0].find('img')
                seller = seller_img.get('alt', 'Unknown') if seller_img else 'Unknown'

                # Extract seller product link
                seller_link = cells[0].find('a')
                seller_product_url = seller_link.get('href', '') if seller_link else ''
                if seller_product_url and seller_product_url.startswith('/'):
                    seller_product_url = urljoin(BASE_URL, seller_product_url)

                availability_span = cells[1].find('span', class_='label')
                availability = availability_span.get_text(strip=True) if availability_span else 'Unknown'

                # Extract price and price link
                price_link = cells[2].find('a')
                price_text = price_link.get_text(strip=True) if price_link else cells[2].get_text(strip=True)

                # Also get the price link URL
                price_link_url = price_link.get('href', '') if price_link else ''
                if price_link_url and price_link_url.startswith('/'):
                    price_link_url = urljoin(BASE_URL, price_link_url)

                price_match = re.search(r'\$(\d+(?:,\d+)*(?:\.\d{2})?)', price_text)
                price = price_match.group(1).replace(',', '') if price_match else 'N/A'

                shipping = cells[3].get_text(strip=True)

                # Use seller product URL if available, otherwise use price link URL
                product_url = seller_product_url if seller_product_url else price_link_url

                sellers_data.append({
                    'seller': seller,
                    'seller_product_url': product_url,
                    'availability': availability,
                    'price': price,
                    'shipping': shipping
                })

    return sellers_data


# ---------------------- UTILITY FUNCTIONS ----------------------
async def random_delay(min_seconds=1, max_seconds=3):
    """Random delay between requests"""
    delay = random.uniform(min_seconds, max_seconds)
    await asyncio.sleep(delay)


async def scrape_product_page(context, product_url, product_name):
    """Scrape detailed specifications from individual pangoly product page"""
    print(f"🔍 Scraping: {product_name[:70]}...")

    try:
        # Create new page for product details
        detail_page = await context.new_page()

        # Set random user agent
        await detail_page.set_extra_http_headers({
            'User-Agent': random.choice(USER_AGENTS),
            **HEADERS
        })

        await detail_page.goto(product_url, wait_until="domcontentloaded", timeout=30000)
        await random_delay(1, 2)

        # Wait for page to load
        try:
            await detail_page.wait_for_selector('.table-bordered, .product-info, h2', timeout=10000)
        except:
            pass

        # Get page content
        detail_html = await detail_page.content()
        detail_soup = BeautifulSoup(detail_html, "html.parser")

        # Extract specifications
        specs = extract_specs_from_product_page(detail_soup)

        # Extract main price
        price = extract_price_pangoly(detail_soup)

        # Extract rating and reviews
        rating, reviews = extract_rating_reviews_pangoly(detail_soup)

        # Extract availability
        availability = extract_availability_pangoly(detail_soup)

        # Extract detailed seller prices WITH LINKS
        sellers_data = extract_seller_prices(detail_soup)

        await detail_page.close()

        if specs:
            print(f"✅ Success: {product_name[:50]}...")
            print(f"   🎯 Brand: {specs.get('Brand', 'N/A')}, GPU: {specs.get('GPU Chipset', 'N/A')}, Memory: {specs.get('Memory Size', 'N/A')}")
            return {
                **specs,
                "Price": price,
                "Rating": rating,
                "Reviews": reviews,
                "Availability": availability,
                "Sellers_Data": sellers_data
            }
        else:
            print(f"⚠️ Limited specs: {product_name[:50]}...")
            return {}

    except Exception as e:
        print(f"❌ Error scraping product page: {str(e)[:100]}...")
        try:
            await detail_page.close()
        except:
            pass
        return {}


def get_next_page_url(soup, current_url, page_number):
    """Find the next page URL from pagination"""
    print(f"🔍 Looking for page {page_number + 1}...")

    # Look for pagination links - pangoly specific
    pagination_selectors = [
        '.pagination .page-link',
        '.pagination a',
        'a[rel="next"]'
    ]

    for selector in pagination_selectors:
        page_links = soup.select(selector)
        for link in page_links:
            link_text = link.get_text(strip=True)
            if link_text.isdigit() and int(link_text) == page_number + 1:
                next_url = urljoin(BASE_URL, link['href'])
                print(f"✅ Found next page via page number {page_number + 1}: {next_url}")
                return next_url

    # Try to find "next" button
    next_selectors = [
        '.pagination .next a',
        'a[rel="next"]',
        '.page-item:last-child a'
    ]

    for selector in next_selectors:
        next_link = soup.select_one(selector)
        if next_link and next_link.get('href'):
            next_url = urljoin(BASE_URL, next_link['href'])
            print(f"✅ Found next page: {next_url}")
            return next_url

    # Try to construct next page URL manually
    if "page=" in current_url:
        next_url = re.sub(r'page=(\d+)', f'page={page_number + 1}', current_url)
    elif "?page" in current_url:
        next_url = re.sub(r'\?page=(\d+)', f'?page={page_number + 1}', current_url)
    else:
        if "?" in current_url:
            next_url = f"{current_url}&page={page_number + 1}"
        else:
            next_url = f"{current_url}?page={page_number + 1}"

    if next_url != current_url:
        print(f"🔄 Trying constructed URL: {next_url}")
        return next_url

    print("❌ No next page found")
    return None


# ---------------------- MAIN SCRAPER FUNCTION ----------------------
async def scrape_pangoly_gpus_detailed():
    all_data = []
    prices_data = []  # Separate list for prices
    max_pages = 50  # Scrape all pages
    processed_urls = set()

    async with async_playwright() as p:
        # Use headless=True for environments without display
        browser = await p.chromium.launch(
            headless=True,
            args=[
                '--disable-blink-features=AutomationControlled',
                '--no-sandbox',
                '--disable-setuid-sandbox',
                '--disable-dev-shm-usage',
                '--disable-gpu',
                '--single-process',
            ]
        )

        context = await browser.new_context(
            user_agent=random.choice(USER_AGENTS),
            viewport={"width": 1920, "height": 1080},
            extra_http_headers=HEADERS
        )

        page = await context.new_page()
        page.set_default_timeout(30000)

        current_url = SEARCH_URL
        page_number = 1
        total_product_count = 0

        while page_number <= max_pages:
            print(f"\n📄 Loading page {page_number}: {current_url}")

            try:
                await page.goto(current_url, wait_until="networkidle", timeout=30000)
                await random_delay(2, 4)

                # Get page content
                html = await page.content()
                soup = BeautifulSoup(html, "html.parser")

                # Debug: Check if we're on the right page
                page_title = await page.title()
                print(f"📝 Page title: {page_title}")

                # Find products using pangoly selectors
                products = []
                product_selectors = [
                    '.productItem',
                    '.productItemBase',
                    '.col-xxs .productItem'
                ]

                for selector in product_selectors:
                    found_products = soup.select(selector)
                    if found_products:
                        products = found_products
                        print(f"✅ Found {len(products)} products with selector: {selector}")
                        break

                if not products:
                    print("❌ No products found with any selector")
                    if page_number == 1:
                        print("💥 No products on first page, stopping")
                        break
                    else:
                        print("🏁 No more products")
                        break

                print(f"🔍 Processing {len(products)} products on page {page_number}...")

                page_product_count = 0
                for i, item in enumerate(products):
                    try:
                        # Extract product data attributes
                        product_id = item.get('data-product', '')

                        # Extract product name
                        name_element = item.select_one('.productItemLink header, .productItemLink')
                        if not name_element:
                            continue

                        name = name_element.get_text(strip=True)
                        if not name:
                            continue

                        # Extract product URL
                        link_element = item.select_one('.productItemLink')
                        if not link_element:
                            continue

                        product_path = link_element.get('href')
                        if not product_path:
                            continue

                        product_url = urljoin(BASE_URL, product_path)

                        # Skip if already processed
                        if product_url in processed_urls:
                            print(f"⏭️  Skipping duplicate: {name[:50]}...")
                            continue

                        processed_urls.add(product_url)

                        # Extract list price from listing
                        price_element = item.select_one('.amprice span, .price span')
                        list_price = price_element.get_text(strip=True) if price_element else "Price not found"

                        print(f"📦 [{page_number}.{i+1}] Processing: {name[:60]}...")

                        # Scrape detailed specifications from product page
                        detailed_data = await scrape_product_page(context, product_url, name)

                        if detailed_data:
                            # Extract sellers data for separate CSV
                            sellers_data = detailed_data.pop('Sellers_Data', [])

                            # Add price records to separate list
                            for seller_info in sellers_data:
                                prices_data.append({
                                    "Product_URL": product_url,
                                    "Product_Name": name,
                                    "Seller": seller_info['seller'],
                                    "Seller_Product_URL": seller_info['seller_product_url'],
                                    "Availability": seller_info['availability'],
                                    "Price": seller_info['price'],
                                    "Shipping": seller_info['shipping'],
                                    "Currency": "USD"
                                })

                            # Main specs data
                            product_data = {
                                "Name": name,
                                "URL": product_url,
                                "Product_ID": product_id,
                                "List_Price": list_price,
                                "Page": page_number,
                                **detailed_data
                            }

                            all_data.append(product_data)
                            page_product_count += 1
                            total_product_count += 1
                            print(f"✅ [{len(all_data)}] Added: {name[:60]}...")

                            # Delay between product scrapes
                            await random_delay(1, 2)
                        else:
                            print(f"❌ Failed to get detailed specs: {name[:50]}...")

                    except Exception as e:
                        print(f"❌ Error processing product {i+1}: {str(e)[:100]}...")
                        continue

                print(f"📊 Added {page_product_count} products from page {page_number} (Total: {total_product_count})")

                # Check for next page
                if page_number < max_pages:
                    next_url = get_next_page_url(soup, current_url, page_number)

                    if next_url and next_url != current_url:
                        current_url = next_url
                        page_number += 1
                        print(f"➡️  Moving to page {page_number}")
                        await random_delay(2, 4)
                    else:
                        print(f"🏁 No more pages found after page {page_number}")
                        break
                else:
                    print(f"🎯 Reached maximum page limit ({max_pages})")
                    break

            except Exception as e:
                print(f"❌ Error on page {page_number}: {e}")
                if page_number == 1:
                    break
                else:
                    page_number += 1
                    continue

        await browser.close()

    # Save results to two CSV files
    if all_data:
        # Save specifications CSV
        specs_df = pd.DataFrame(all_data)
        specs_df = specs_df.drop_duplicates(subset=['URL'])

        # Define column order for GPU specs
        specs_columns = [
            'Name', 'Brand', 'GPU Brand', 'GPU Series', 'GPU Chipset', 'Model',
            'Memory Size', 'Memory Type', 'Base Clock', 'Boost Clock', 'TDP',
            'Interface', 'Length', 'Cooling Type', 'Power Connectors',
            'SLI Support', 'CrossFire Support', 'DisplayPort', 'HDMI',
            'G-SYNC Support', 'FreeSync Support', 'Expansion Slots',
            'Release Date', 'Price', 'Rating', 'Reviews', 'Availability',
            'List_Price', 'URL', 'Product_ID', 'Page'
        ]

        # Ensure all columns exist
        for col in specs_columns:
            if col not in specs_df.columns:
                specs_df[col] = None

        # Reorder columns
        existing_specs_columns = [col for col in specs_columns if col in specs_df.columns]
        specs_df = specs_df[existing_specs_columns]

        specs_filename = "pangoly_gpu_specifications.csv"
        specs_df.to_csv(specs_filename, index=False, encoding='utf-8')

        # Save prices CSV
        if prices_data:
            prices_df = pd.DataFrame(prices_data)
            prices_df = prices_df.drop_duplicates()
            prices_filename = "pangoly_gpu_prices.csv"
            prices_df.to_csv(prices_filename, index=False, encoding='utf-8')

        print(f"\n🎉 PANGOLY GPU SCRAPING COMPLETE!")
        print(f"📊 Total unique products: {len(specs_df)}")
        print(f"💰 Total price records: {len(prices_data)}")
        print(f"💾 Files created:")
        print(f"   - {specs_filename}")
        print(f"   - {prices_filename}")

        # Show summary
        if len(specs_df) > 0:
            print(f"\n📈 SUMMARY:")
            print(f"   Brands: {specs_df['Brand'].value_counts().to_dict()}")
            print(f"   GPU Brands: {specs_df['GPU Brand'].value_counts().to_dict()}")
            if 'Memory Size' in specs_df.columns:
                print(f"   Memory Sizes: {specs_df['Memory Size'].value_counts().head(10).to_dict()}")
            if 'GPU Series' in specs_df.columns:
                print(f"   GPU Series: {specs_df['GPU Series'].value_counts().head(10).to_dict()}")

        return specs_df, prices_df
    else:
        print("❌ No data collected")
        return None, None


# ---------------------- SIMPLIFIED RUN FUNCTION ----------------------
async def main():
    """Simplified main function to avoid event loop issues"""
    print("🚀 STARTING PANGOLY GPU SCRAPER")
    print("🎯 Strategy: Extract GPU specs and prices from pangoly.com")
    print("📁 Output: Two CSV files (specifications + prices)")
    print("🔗 Features: Includes actual seller product URLs")
    print("⏰ This will take time as we visit each product page...")

    start_time = time.time()
    specs_result, prices_result = await scrape_pangoly_gpus_detailed()
    end_time = time.time()

    if specs_result is not None:
        print(f"\n✅ SUCCESS!")
        print(f"⏱️  Total time: {(end_time - start_time)/60:.1f} minutes")
        print(f"📦 Total products: {len(specs_result)}")
        print(f"💰 Price records: {len(prices_result) if prices_result is not None else 0}")

        # Show seller URLs sample
        if prices_result is not None and len(prices_result) > 0:
            print(f"\n🔗 Sample seller URLs collected:")
            for i, seller in enumerate(prices_result.head(3).itertuples()):
                print(f"   {i+1}. {seller.Seller}: {seller.Seller_Product_URL[:80]}...")
    else:
        print(f"\n❌ Failed to collect data")


# ---------------------- RUN ----------------------
if __name__ == "__main__":
    # Simple asyncio run for headless environments
    # asyncio.run(main()) # Comment out asyncio.run()
    await main() # Use await instead

🚀 STARTING PANGOLY GPU SCRAPER
🎯 Strategy: Extract GPU specs and prices from pangoly.com
📁 Output: Two CSV files (specifications + prices)
🔗 Features: Includes actual seller product URLs
⏰ This will take time as we visit each product page...

📄 Loading page 1: https://pangoly.com/en/browse/vga
📝 Page title: Graphics Card finder: Pick graphics cards | Pangoly
✅ Found 14 products with selector: .productItem
🔍 Processing 14 products on page 1...
📦 [1.1] Processing: PNY GeForce RTX 5050 Dual Fan53rd / 101...
🔍 Scraping: PNY GeForce RTX 5050 Dual Fan53rd / 101...


  def __init__(


✅ Success: PNY GeForce RTX 5050 Dual Fan53rd / 101...
   🎯 Brand: Pny, GPU: GeForce RTX 5050, Memory: 8 GB
✅ [1] Added: PNY GeForce RTX 5050 Dual Fan53rd / 101...
📦 [1.2] Processing: Zotac GeForce RTX 5050 Solo53rd / 101...
🔍 Scraping: Zotac GeForce RTX 5050 Solo53rd / 101...
✅ Success: Zotac GeForce RTX 5050 Solo53rd / 101...
   🎯 Brand: Zotac, GPU: GeForce RTX 5050, Memory: 8 GB
✅ [2] Added: Zotac GeForce RTX 5050 Solo53rd / 101...
📦 [1.3] Processing: Zotac GeForce RTX 5050 Twin Edge OC53rd / 101...
🔍 Scraping: Zotac GeForce RTX 5050 Twin Edge OC53rd / 101...
✅ Success: Zotac GeForce RTX 5050 Twin Edge OC53rd / 101...
   🎯 Brand: Zotac, GPU: GeForce RTX 5050, Memory: 8 GB
✅ [3] Added: Zotac GeForce RTX 5050 Twin Edge OC53rd / 101...
📦 [1.4] Processing: Gigabyte GeForce RTX 5050 OC Low Profile53rd / 101...
🔍 Scraping: Gigabyte GeForce RTX 5050 OC Low Profile53rd / 101...
✅ Success: Gigabyte GeForce RTX 5050 OC Low Profile53rd / 101...
   🎯 Brand: Gigabyte, GPU: GeForce RTX 5050, Memor