In [1]:
# HCS Workshop 2, Web Scraping
# Scrape Net a Porter bag sections

In [2]:
# Author: Addison Zhang

In [3]:
# import packages and background info
import pandas as pd 
import requests
from bs4 import BeautifulSoup

In [4]:
def scrap_netaporter_bag(t):
    more_pages = True
    prev = "https://www.net-a-porter.com"
    
    page_url = f"https://www.net-a-porter.com/en-us/shop/bags/{t}"
    links = []
    names = []
    prices = []
    brands = []
    while True:
        page = requests.get(page_url)
        if (page.status_code) == 200:
            soup = BeautifulSoup(page.content, 'html.parser')
            products = soup.find("div", {"class": "ProductGrid50 ProductListWithLoadMore50__listingGrid"})
            items = products.find_all('a')

            links = links + [prev + item['href'] for item in items]
            for item in items:
                desc = item.find(class_='ProductItem24__skeletonContainer')
                names.append(desc.find("span", {"itemprop": "name"}).get_text())
                prices.append(desc.find("span", {"itemprop": "price"}).get_text().replace(',', '').replace('$', ''))
                brands.append(desc.find("span", {"itemprop": "brand"}).get_text())
            
            num = page_url[-1]
            page_url = page_url.replace(num, str(int(num) + 1)) if num.isdigit() else page_url + "?pageNumber=2"
        else:
            break

    products_df = pd.DataFrame([names, prices, brands, links], index=['name', 'price', 'brand', 'link']).T
    products_df['price'] = products_df['price'].astype('float')

    return products_df

In [5]:
bag_types = ["shoulder-bags", "tote-bags", "mini-bags", "backpacks", "clutch-bags", "evening-bags", 
             "luggage-and-travel", "belt-bags", "bucket-bags", "top-handle", "bag-trends"]

bag_type = "top-handle"
results = scrap_netaporter_bag(bag_type)
results

Unnamed: 0,name,price,brand,link
0,Uptown small textured-leather tote,2290.0,SAINT LAURENT,https://www.net-a-porter.com/en-us/shop/produc...
1,Puzzle mini textured-leather shoulder bag,1950.0,Loewe,https://www.net-a-porter.com/en-us/shop/produc...
2,Ai Messenger small convertible quilted leather...,1990.0,Akris,https://www.net-a-porter.com/en-us/shop/produc...
3,Hourglass small textured-leather tote,1850.0,Balenciaga,https://www.net-a-porter.com/en-us/shop/produc...
4,Eyes small leather-trimmed beaded tote,795.0,Anya Hindmarch,https://www.net-a-porter.com/en-us/shop/produc...
...,...,...,...,...
202,Leather tote,2890.0,Fendi,https://www.net-a-porter.com/en-us/shop/produc...
203,Bon Bon velvet bucket bag,995.0,Jimmy Choo,https://www.net-a-porter.com/en-us/shop/produc...
204,Flat Circle suede tote,990.0,The Row,https://www.net-a-porter.com/en-us/shop/produc...
205,Metro medium canvas and leather tote,3390.0,TOM FORD,https://www.net-a-porter.com/en-us/shop/produc...


In [6]:
results.sort_values(by=['price', 'brand'], ascending=[False, True])

Unnamed: 0,name,price,brand,link
182,Cabas large intrecciato leather tote,7500.0,Bottega Veneta,https://www.net-a-porter.com/en-us/shop/produc...
173,Cabas medium intrecciato leather tote,7000.0,Bottega Veneta,https://www.net-a-porter.com/en-us/shop/produc...
190,Cabas medium intrecciato leather tote,7000.0,Bottega Veneta,https://www.net-a-porter.com/en-us/shop/produc...
200,Leather-trimmed python tote,4600.0,Fendi,https://www.net-a-porter.com/en-us/shop/produc...
148,Garance medium studded leather tote,4360.0,Alaïa,https://www.net-a-porter.com/en-us/shop/produc...
...,...,...,...,...
50,+ Palorosa small striped woven tote,290.0,Eres,https://www.net-a-porter.com/en-us/shop/produc...
43,Play Mini croc-effect leather tote,290.0,Gu_de,https://www.net-a-porter.com/en-us/shop/produc...
68,Bobby mini buckled leather tote,270.0,BOYY,https://www.net-a-porter.com/en-us/shop/produc...
57,Eos rattan tote,220.0,Cult Gaia,https://www.net-a-porter.com/en-us/shop/produc...
