In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time  # For time delay between requests

def scrape_store(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return None
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Initialize an empty list to store all products
    all_products = []
    
    # Extract products from the initial page
    products = soup.select('.productLink_KM4PI')  # Selector for product container
    all_products.extend(products)
    
    # Check if there's pagination and loop through additional pages if available
    next_page = soup.select_one('.pagination-next a')
    
    while next_page:
        next_url = 'https://www.asos.com' + next_page['href']
        response = requests.get(next_url, headers=headers)
        
        if response.status_code != 200:
            print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
            break
        
        soup = BeautifulSoup(response.content, 'html.parser')
        products = soup.select('.productLink_KM4PI')
        all_products.extend(products)
        
        # Check for the next page link
        next_page = soup.select_one('.pagination-next a')
        
        # Add a small delay to avoid overloading the server
        time.sleep(1)
    
    # Now extract data from all collected products
    data = []
    
    for product in all_products:
        title = product.select_one('.productDescription_sryaw').get_text(strip=True) if product.select_one('.productDescription_sryaw') else 'N/A'  # Selector for product title
        original_price = product.select_one('.originalPrice_jEWt1 .price__B9LP').get_text(strip=True) if product.select_one('.originalPrice_jEWt1 .price__B9LP') else 'N/A'  # Selector for original price
        link = product['href'] if product else 'N/A'
        image_tag = product.select_one('.productHeroContainer_dVvdX img')
        image_url = "https:" + image_tag['src'] if image_tag else 'N/A'  # Ensure the URL is complete
        
        data.append({
            'Title': title,
            'Original Price': original_price,
            'Link': link,
            'Image URL': image_url
        })

    return pd.DataFrame(data)

# Example usage
url = 'https://www.asos.com/women/holiday/holiday-dresses/cat/?cid=51513'  # Replace with the actual URL
output_file = 'asos_output.csv'

df = scrape_store(url)

if df is not None:
    df.to_csv(output_file, index=False)
    print(f"Scraped {len(df)} products from {url} and saved to {output_file}")
else:
    print("No data scraped.")


Scraped 72 products from https://www.asos.com/women/holiday/holiday-dresses/cat/?cid=51513 and saved to asos_output.csv
