# Web Scraping - Shopee Seller Products

---

For introduction of Selenium, please be kind to open [**this site**](https://www.scrapingbee.com/blog/selenium-python/)

## Import modules

In [None]:
%pip freeze > requirements.txt

In [1]:
# Modules for web scraping
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
# URL encoding
from requests.utils import requote_uri
# Module for data manipulation
import pandas as pd
# Module for regular expression
import re
# Module for file management
import os
# Module for timing
import time

## Load the Chromedriver

Read how to download webdriver for Chrome [**here**]('https://chromedriver.chromium.org/downloads')

**Options**

In [2]:
# Options for Chrome windows
options = webdriver.ChromeOptions()
options.add_argument('--start-maximized')
options.add_argument('window-size=2560,1440')

**Chromedriver path**

In [3]:
DRIVER_PATH = '../bin/chromedriver'
driver = webdriver.Chrome(executable_path = DRIVER_PATH, options = options)

## Core Procedure

**URL and Query**

In [10]:
# Main URL
url = 'https://shopee.co.id/{seller}?page={page}&sortBy=pop'
# Keyword search - seller username
seller = 'compshop88'
# Page for loops
page = '0'
# Concat the keyword into main URL
url_query = url.format(seller = seller, page = page)

In [11]:
# Full URL with keyword
url_query

'https://shopee.co.id/compshop88?page=0&sortBy=pop'

In [12]:
# Root directory
dir_path = os.getcwd()

**Access website**

In [13]:
driver.get(url_query)
driver.implicitly_wait(20)

In [14]:
# Document scrollHeight
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
scroll_height

4219

In [15]:
# Scroll down slowly
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
for iters in range(0, scroll_height, 200):
    driver.execute_script('window.scrollTo(0, {});'.format(iters))
    time.sleep(1)

**List all product and its characteristics**

In [16]:
product_list = driver.find_elements_by_class_name('shop-search-result-view')

In [17]:
product_list[0]

<selenium.webdriver.remote.webelement.WebElement (session="6368255b5147e59651176dc48296ee99", element="2a5b55f7-963c-407a-a8b5-4b8d87905871")>

In [18]:
# URL
product_link = []
link = product_list[0].find_elements_by_tag_name('a')
for elem in link:
    link_elem = elem.get_attribute('href')
    product_link.append(link_elem)
product_link

['https://shopee.co.id/PACKING-KAYU-J-T-PENGIRIMAN-LUAR-JAWA-LUAR-PULAU-i.225418842.3634048048',
 'https://shopee.co.id/Laptop-Murah-Lenovo-Flex-5-14-Ryzen-3-4300-4GB-128ssd-W10-14.0FHD-i.225418842.6167653246',
 'https://shopee.co.id/Laptop-Murah-HP-14-AMD-A4-4GB-128GB-(64-64)-W10-14.0-Black-Silver-i.225418842.7678624915',
 'https://shopee.co.id/Asus-X441BA-GA441T-GA442T-GA443T-GA444T-A4-9125-4GB-1TB-W10-14.0-DVD-i.225418842.6734876824',
 'https://shopee.co.id/Laptop-Murah-Asus-E410MA-BV451TS-BV452TS-BV453TS-N4020-4GB-512ssd-W10-OHS-14.0-Blit-NumPad-i.225418842.4168823182',
 'https://shopee.co.id/PALING-LAKUUU-PAKET-PERLINDUNGAN-LAYAR-DAN-COVER-ATAS-LAPTOP-i.225418842.6847414277',
 'https://shopee.co.id/Laptop-Asus-Vivobook-Flip-TP401MA-BZ221TS-2in1-Touch-N4020-4GB-256ssd-W10-OHS-14.0-PEN-i.225418842.6563094357',
 'https://shopee.co.id/Laptop-Murah-HP-14-Ryzen-3-3250-4GB-256ssd-Vega3-W10-14.0-i.225418842.5564441548',
 'https://shopee.co.id/Laptop-Murah-Lenovo-Ideapad-Slim-5-14-EVID-Ryz

In [20]:
# Product name
product_name = []
product = product_list[0].find_elements_by_class_name('PFM7lj')
for elem in product:
    name_elem = elem.text
    product_name.append(name_elem)
product_name

['PACKING KAYU J&T - PENGIRIMAN LUAR JAWA / LUAR PULAU',
 'Laptop Murah Lenovo Flex 5 14 Ryzen 3 4300 4GB 128ssd W10 14.0FHD',
 'Laptop Murah HP 14 AMD A4 4GB 128GB (64+64) W10 14.0 Black / Silver',
 'Asus X441BA GA441T / GA442T / GA443T / /GA444T A4 9125 4GB 1TB W10 14.0 DVD',
 'Laptop Murah Asus E410MA BV451TS / BV452TS / BV453TS N4020 4GB 512ssd W10+OHS 14.0 Blit NumPad',
 'PALING LAKUUU - PAKET PERLINDUNGAN LAYAR DAN COVER ATAS LAPTOP',
 'Laptop Asus Vivobook Flip TP401MA BZ221TS 2in1 Touch N4020 4GB 256ssd W10+OHS 14.0 PEN',
 'Laptop Murah HP 14 Ryzen 3 3250 4GB 256ssd Vega3 W10 14.0',
 'Laptop Murah Lenovo Ideapad Slim 5 14 EVID Ryzen 7 4800 8GB 512ssd W10+OHS 14.0FHD IPS 2Yr ADP BLUE',
 'Laptop Murah ZYREX Sky 232 N4020 4GB 64+256ssd 11.6FHD W10 BT (Garansi Dragon)',
 'Flashdisk Murah Sandisk Cruzer Blade 16GB 100% Original Garansi Resmi 5 tahun',
 'Tablet Handphone HP Murah MAXTRON GENIO TAB 3GB 32GB / 64GB / 160GB 8inch',
 'Laptop Murah Dell Latitude 13 7389 2in1 Touch i5 7300

In [74]:
# Product category
category = driver.find_elements_by_class_name('_3yjqG-')
categoryList = []
for i in range(1, len(category)):
    categoryList.append(category[i].text)

In [91]:
str(categoryList)

"['Laptop Konsumer', 'Laptop Gaming', 'Ultrabook', '2-in-1', 'Storage Eksternal', 'Aksesoris Komputer', 'Desktop', 'Mouse & Keyboards', 'Komponen Komputer', 'Gaming', 'Tas Pria', 'Handphone & Aksesoris']"

In [23]:
# Product price
product_price = []
price = product_list[0].find_elements_by_class_name('_32hnQt')
for elem in price:
    price_elem = elem.text
    product_price.append(price_elem)
product_price

['Rp100.000',
 'Rp6.999.000 - Rp7.699.000',
 'Rp4.449.000 - Rp4.649.000',
 'Rp4.849.000 - Rp5.049.000',
 'Rp5.549.000 - Rp5.749.000',
 'Rp100.000 - Rp200.000',
 'Rp6.549.000 - Rp6.749.000',
 'Rp6.349.000 - Rp6.549.000',
 'Rp10.299.000 - Rp10.499.000',
 'Rp3.599.000 - Rp3.799.000',
 'Rp41.999',
 'Rp1.149.000',
 'Rp8.699.000 - Rp8.899.000',
 'Rp4.849.000 - Rp5.049.000',
 'Rp4.649.000 - Rp4.849.000',
 'Rp10.399.000 - Rp10.599.000',
 'Rp3.999.000',
 'Rp7.999.000 - Rp8.199.000',
 'Rp13.999.000 - Rp14.199.000',
 'Rp199.000',
 'Rp23.499.000 - Rp23.699.000',
 'Rp11.699.000 - Rp11.899.000',
 'Rp10.999.000 - Rp11.199.000',
 'Rp2.799.000 - Rp2.999.000',
 'Rp7.289.000 - Rp7.489.000',
 'Rp7.999.000 - Rp8.199.000',
 'Rp650.000',
 'Rp390.000',
 'Rp250.000',
 'Rp10.179.000 - Rp10.379.000']

In [26]:
rating = product_list[0].find_elements_by_class_name('_3dC36C')

In [27]:
len(rating)

30

In [28]:
rating_elem = len(rating[0].find_elements_by_class_name('shopee-rating-stars__star-wrapper'))
rating_elem

5

In [29]:
# Rating
product_rating = []
rating = product_list[0].find_elements_by_class_name('_3dC36C')
for elem in rating:
    rating_elem = len(elem.find_elements_by_class_name('shopee-rating-stars__star-wrapper'))
    product_rating.append(rating_elem)
product_rating

[5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 0,
 0,
 5,
 5,
 5,
 5,
 5,
 5,
 5]

In [30]:
len(product_rating)

30

In [31]:
# Number of product sold out
product_sold = []
sold = product_list[0].find_elements_by_class_name('go5yPW')
for elem in sold:
    sold_elem = elem.text
    product_sold.append(sold_elem)
product_sold

['33 Terjual',
 '27 Terjual',
 '19 Terjual',
 '10 Terjual',
 '10 Terjual',
 '7 Terjual',
 '7 Terjual',
 '5 Terjual',
 '6 Terjual',
 '14 Terjual',
 '7 Terjual',
 '3 Terjual',
 '4 Terjual',
 '4 Terjual',
 '3 Terjual',
 '5 Terjual',
 '3 Terjual',
 '3 Terjual',
 '2 Terjual',
 '3 Terjual',
 '2 Terjual',
 '',
 '',
 '2 Terjual',
 '2 Terjual',
 '1 Terjual',
 '1 Terjual',
 '2 Terjual',
 '4 Terjual',
 '2 Terjual']

In [33]:
# Place
product_place = []
place = product_list[0].find_elements_by_class_name('_2CWevj')
for elem in place:
    place_elem = elem.text
    product_place.append(place_elem)
product_place

['KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA',
 'KOTA JAKARTA UTARA']

In [34]:
# Number of reviewers each product
driver.get('https://shopee.co.id/PACKING-KAYU-J-T-PENGIRIMAN-LUAR-JAWA-LUAR-PULAU-i.225418842.3634048048')

In [36]:
driver.find_elements_by_class_name('OitLRu')[1].text

'21'

In [39]:
# Number of fav
driver.find_elements_by_class_name('_39mrb_')[1].text

'Favorit (14)'

In [59]:
# Product specification
length_element = len(driver.find_elements_by_class_name('aPKXeO'))
specification = {}
for index in range(length_element):
    key = driver.find_elements_by_class_name('aPKXeO')[index].find_elements_by_class_name('SFJkS3')[0].text
    value = driver.find_elements_by_class_name('aPKXeO')[index].find_elements_by_tag_name('div')[0].text
    specification.update({key: value})

In [85]:
[str(specification)]

["{'Kategori': 'Shopee\\nSerba Serbi', 'Merek': 'Tidak Ada Merek', 'Stok': '100', 'Dikirim Dari': 'KOTA JAKARTA UTARA - PADEMANGAN, DKI JAKARTA, ID'}"]

In [63]:
# Product description
driver.find_element_by_class_name('_3yZnxJ').text

'Deskripsi PACKING KAYU - PENGIRIMAN LUAR JAWA / LUAR PULAU\n\nPENGIRIMAN KHUSUS J&T, SELAIN J&T TIDAK BISA 100%\n\nUNTUK UKURAN SEMUA LAPTOP'

**Next page**

In [65]:
driver.get(url_query)
driver.implicitly_wait(20)

In [66]:
# Document scrollHeight
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
scroll_height

4219

In [67]:
# Scroll down slowly
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
for iters in range(0, scroll_height, 200):
    driver.execute_script('window.scrollTo(0, {});'.format(iters))
    time.sleep(1)

In [68]:
# Maximum page
driver.find_element_by_class_name('shopee-mini-page-controller__total').text

'18'

In [76]:
l = []
i = ['A', 'B', 'C']

In [80]:
j = ['D', 'E', 'A']

In [81]:
l + i + j

['A', 'B', 'C', 'D', 'E', 'A']

In [94]:
l = []
for i in range(3):
    for j in range(5):
        l.append(j)

In [95]:
l

[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]