# Import Modules

In [7]:
# Modules for web scraping
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
# URL encoding
from requests.utils import requote_uri
# Module for data manipulation
import pandas as pd
# Module for regular expression
import re
# Module for file management
import os
# Module for timing
import time

# Web Scraping

**Chromedriver path**

In [9]:
# Options for Chrome windows
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument('window-size=2560,1440')

In [11]:
DRIVER_PATH = '/home/audhi/github/Web-Scraping-Using-Python-and-R/Python/chromedriver'
driver = webdriver.Chrome(executable_path = DRIVER_PATH,options=options)

---

### How to get the review from seller

**URL and Query**

In [12]:
main_url = 'https://shopee.co.id'
url = 'https://shopee.co.id/search?keyword='
seller = 'headset'
query_encode = requote_uri(seller.strip('').lower())
url_query = url + query_encode
dir_path = os.getcwd()

**Keyword**

In [13]:
url_query

'https://shopee.co.id/search?keyword=headset'

**Access website**

In [14]:
driver.get(url_query)
driver.implicitly_wait(20)

In [16]:
# Document scrollHeight
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
scroll_height

3877

In [17]:
# Scroll down slowly
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
for iter in range(0,scroll_height,200):
    driver.execute_script('window.scrollTo(0, {});'.format(iter))
    time.sleep(1.5)

In [18]:
soup = BeautifulSoup(driver.page_source,'html.parser')

**Scrape seller identity - name and follower**

In [19]:
nickname = soup.find(attrs={'class':'shopee-search-user-item__nickname'}).text
username = soup.find(attrs={'class':'shopee-search-user-item__username'}).text
follower_following = soup.find(attrs={'class':'shopee-search-user-item__follow-count'}).text

In [20]:
print(nickname)
print(username)
print(follower_following)

SoundMagic Indonesia Official 
soundmagic.indonesia
1,6RB pengikut | 1 mengikuti


**Scrape number of product, rating star, service satisfaction**

In [21]:
elem_seller = soup.find(attrs={'class':'shopee-search-user-item__statistics'})

In [22]:
elem_list = []
for elem in elem_seller:
    stat_seller = elem.find('span',attrs={'class':'shopee-search-user-seller-info-item__primary-text'}).text
    elem_list.append(stat_seller)

In [23]:
elem_list

['39', '4.6', '100%', 'hitungan jam']

In [24]:
sub_query = soup.find('a',attrs={'class':'shopee-search-user-item__shop-info'})['href']

**Come in to seller's homepage**

In [25]:
driver.get('https://shopee.co.id/soundmagic.indonesia')
driver.implicitly_wait(20)

**Scrape the seller identity in their homepage**

In [26]:
identity_seller = driver.find_elements_by_class_name('section-seller-overview-horizontal__seller-info-list')

In [27]:
identity_seller

[<selenium.webdriver.remote.webelement.WebElement (session="dbdc4abb6ff69b60a7b3f4ed4daa94ae", element="d9c3c8f4-f7c5-4e58-b1c0-34254bf9d422")>]

In [28]:
identity_seller_list = identity_seller[0].find_elements_by_class_name('section-seller-overview__item-text-value')

In [29]:
identity_list = []
for elem in identity_seller_list:
    identity = elem.text
    identity_list.append(identity)
identity_list

['39',
 '',
 '1',
 '',
 '100% (Hitungan Jam)',
 '',
 '1,6RB',
 '',
 '4.6 (645 Penilaian)',
 '',
 '24 Bulan Lalu',
 '']

In [30]:
identity_list_fix = [item for item in identity_list if item != '']
identity_list_fix

['39',
 '1',
 '100% (Hitungan Jam)',
 '1,6RB',
 '4.6 (645 Penilaian)',
 '24 Bulan Lalu']

In [31]:
navbar = driver.find_elements_by_class_name('navbar-with-more-menu__item')

In [32]:
len(navbar)

4

**Look at all products**

In [33]:
list_link = []
for elem in navbar:
    link = elem.get_attribute('href')
    list_link.append(link)
list_link

['https://shopee.co.id/soundmagic.indonesia',
 'https://shopee.co.id/shop/79849470/search',
 'https://shopee.co.id/shop/79849470/search?shopCollection=11153701',
 'https://shopee.co.id/shop/79849470/search?shopCollection=11153623']

In [34]:
driver.get(list_link[1])
driver.implicitly_wait(20)

In [35]:
# Scroll down slowly
scroll_height = driver.execute_script('return document.documentElement.scrollHeight;')
for iter in range(0,scroll_height,200):
    driver.execute_script('window.scrollTo(0, {});'.format(iter))
    time.sleep(1.5)

**List all product and its characteristics**

In [36]:
product_list = driver.find_elements_by_class_name('shop-search-result-view')

In [37]:
product_list[0]

<selenium.webdriver.remote.webelement.WebElement (session="dbdc4abb6ff69b60a7b3f4ed4daa94ae", element="6ad81e45-5a77-4d8d-bbf5-357a294fbcf1")>

In [38]:
# URL
product_link = []
link = product_list[0].find_elements_by_tag_name('a')
for elem in link:
    link_elem = elem.get_attribute('href')
    product_link.append(link_elem)
product_link

['https://shopee.co.id/SoundMagic-ES18S-Green-In-Earphone-WIth-Mic-Garansi-Resmi-1-Tahun-i.79849470.1540892606',
 'https://shopee.co.id/SoundMAGIC-ES18-Orange-In-Earphone-Non-Mic-Garansi-Resmi-1-tahun-i.79849470.1347518477',
 'https://shopee.co.id/SoundMagic-E11C-In-Earphone-With-Mic-Garansi-Resmi-1-Tahun-(HOT-PRODUCT-Paling-Laris)-i.79849470.1540922945',
 'https://shopee.co.id/SoundMAGIC-P11S-Black-Garansi-Resmi-1-Tahun-i.79849470.1347363500',
 'https://shopee.co.id/SoundMAGIC-ES18-In-Earphone-Non-Mic-Green-Garansi-Resmi-1-tahun-i.79849470.1335845309',
 'https://shopee.co.id/SoundMagic-ES19S-In-Earphone-With-Mic-White-(NEW-PRODUCT)-Garansi-Resmi-1-Tahun-i.79849470.1566440074',
 'https://shopee.co.id/SoundMAGIC-E10C-In-Earphone-Silver-Garansi-Resmi-1-Tahun-(Yang-Paling-Banyak-dapat-penghargaan)-i.79849470.1347580205',
 'https://shopee.co.id/SoundMAGIC-ES18-Black-In-Earphone-Non-Mic-Garansi-Resmi-1-tahun-i.79849470.1347501038',
 'https://shopee.co.id/SoundMAGIC-ES18-Red-In-Earphone-Non-

In [39]:
# Product name
product_name = []
product = product_list[0].find_elements_by_class_name('O6wiAW')
for elem in product:
    name_elem = elem.text
    product_name.append(name_elem)
product_name

['SoundMagic ES18S Green In Earphone WIth Mic Garansi Resmi 1 Tahun',
 'SoundMAGIC ES18 Orange In Earphone Non Mic Garansi Resmi 1 tahun',
 'SoundMagic E11C In Earphone With Mic Garansi Resmi 1 Tahun (HOT PRODUCT Paling Laris)',
 'SoundMAGIC P11S Black Garansi Resmi 1 Tahun',
 'SoundMAGIC ES18 In Earphone Non Mic Green Garansi Resmi 1 tahun',
 'SoundMagic ES19S In Earphone With Mic White (NEW PRODUCT) Garansi Resmi 1 Tahun',
 'SoundMAGIC E10C In Earphone Silver Garansi Resmi 1 Tahun (Yang Paling Banyak dapat penghargaan)',
 'SoundMAGIC ES18 Black In Earphone Non Mic Garansi Resmi 1 tahun',
 'SoundMAGIC ES18 Red In Earphone Non Mic Garansi Resmi 1 tahun',
 'SoundMAGIC E10 in Earphone Blue Garansi Resmi 1 tahun',
 'SoundMagic Bluetooth Earphone In Ear Isolating - ES20BT - Black (NEW PRODUCT) Garansi Resmi 1 Tahun',
 'SoundMAGIC E10C In Earphone Red Garansi Resmi 1 Tahun (paling Banyak Mendapatkan Penghargaan)',
 'SoundMAGIC E10BT Bluetooth Earphones with Smartphone Controls & Mic Garansi

In [40]:
# Product price
product_price = []
price = product_list[0].find_elements_by_class_name('_2lBkmX')
for elem in price:
    price_elem = elem.text
    product_price.append(price_elem)
product_price

['Rp175.000',
 'Rp149.000',
 'Rp635.000',
 'Rp250.000',
 'Rp149.000',
 'Rp225.000',
 'Rp525.000',
 'Rp149.000',
 'Rp149.000',
 'Rp435.000',
 'Rp395.000',
 'Rp525.000',
 'Rp585.000\nRp365.000',
 'Rp435.000',
 'Rp435.000',
 'Rp700.000',
 'Rp525.000',
 'Rp250.000',
 'Rp626.500',
 'Rp750.000',
 'Rp700.000',
 'Rp525.000',
 'Rp895.000',
 'Rp247.500',
 'Rp365.000',
 'Rp94.500']

In [41]:
rating = product_list[0].find_elements_by_class_name('_2bRB2L')

In [42]:
len(rating)

26

In [43]:
rating_elem = len(rating[0].find_elements_by_class_name('shopee-rating-stars__star-wrapper'))
rating_elem

5

In [44]:
# Rating
product_rating = []
rating = product_list[0].find_elements_by_class_name('_2bRB2L')
for elem in rating:
    rating_elem = len(elem.find_elements_by_class_name('shopee-rating-stars__star-wrapper'))
    product_rating.append(rating_elem)
product_rating

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0, 0, 0, 5, 5, 5, 5, 0, 0, 0, 0]

In [45]:
len(product_rating)

26

In [46]:
# Number of product sold out
product_sold = []
sold = product_list[0].find_elements_by_class_name('_18SLBt')
for elem in sold:
    sold_elem = elem.text
    product_sold.append(sold_elem)
product_sold

['81 Terjual',
 '65 Terjual',
 '79 Terjual',
 '14 Terjual',
 '33 Terjual',
 '14 Terjual',
 '34 Terjual',
 '16 Terjual',
 '6 Terjual',
 '2 Terjual',
 '1 Terjual',
 '27 Terjual',
 '8 Terjual',
 '6 Terjual',
 '5 Terjual',
 '',
 '1 Terjual',
 '',
 '1 Terjual',
 '3 Terjual',
 '2 Terjual',
 '3 Terjual',
 '',
 '',
 '',
 '']

In [47]:
# Place
product_place = []
place = product_list[0].find_elements_by_class_name('_3amru2')
for elem in place:
    place_elem = elem.text
    product_place.append(place_elem)
product_place

['KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT',
 'KOTA JAKARTA PUSAT']

**Create dataframe**

In [57]:
identity_list_fix

['39',
 '1',
 '100% (Hitungan Jam)',
 '1,6RB',
 '4.6 (645 Penilaian)',
 '24 Bulan Lalu']

In [58]:
nickname_seller = [nickname] * len(product_link)
username_seller = [username] * len(product_link)
following = [identity_list_fix[1]] * len(product_link)
follower = [identity_list_fix[3]] * len(product_link)
num_product = [identity_list_fix[0]] * len(product_link)
rating = [identity_list_fix[4]] * len(product_link)
chat_service = [identity_list_fix[2]] * len(product_link)
sale_age = [identity_list_fix[5]] * len(product_link)

In [60]:
data_seller = pd.concat([pd.Series(nickname_seller,name='nickname_seller'),
                         pd.Series(username_seller,name='username_seller'),
                         pd.Series(follower,name='follower'),
                         pd.Series(following,name='following'),
                         pd.Series(num_product,name='num_product'),
                         pd.Series(rating,name='rating'),
                         pd.Series(chat_service,name='chat_service'),
                         pd.Series(sale_age,name='sale_age'),
                         pd.Series(product_name,name='product_name'),
                         pd.Series(product_price,name='product_price'),
                         pd.Series(product_rating,name='product_rating'),
                         pd.Series(product_sold,name='product_sold'),
                         pd.Series(product_place,name='product_place'),
                         pd.Series(product_link,name='product_link')],axis=1)

In [61]:
data_seller

Unnamed: 0,nickname_seller,username_seller,follower,following,num_product,rating,chat_service,sale_age,product_name,product_price,product_rating,product_sold,product_place,product_link
0,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMagic ES18S Green In Earphone WIth Mic Ga...,Rp175.000,5,81 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMagic-ES18S-Green-In...
1,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC ES18 Orange In Earphone Non Mic Gar...,Rp149.000,5,65 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-ES18-Orange-In...
2,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMagic E11C In Earphone With Mic Garansi R...,Rp635.000,5,79 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMagic-E11C-In-Earpho...
3,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC P11S Black Garansi Resmi 1 Tahun,Rp250.000,5,14 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-P11S-Black-Gar...
4,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC ES18 In Earphone Non Mic Green Gara...,Rp149.000,5,33 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-ES18-In-Earpho...
5,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMagic ES19S In Earphone With Mic White (N...,Rp225.000,5,14 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMagic-ES19S-In-Earph...
6,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC E10C In Earphone Silver Garansi Res...,Rp525.000,5,34 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-E10C-In-Earpho...
7,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC ES18 Black In Earphone Non Mic Gara...,Rp149.000,5,16 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-ES18-Black-In-...
8,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC ES18 Red In Earphone Non Mic Garans...,Rp149.000,5,6 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-ES18-Red-In-Ea...
9,SoundMagic Indonesia Official,soundmagic.indonesia,"1,6RB",1,39,4.6 (645 Penilaian),100% (Hitungan Jam),24 Bulan Lalu,SoundMAGIC E10 in Earphone Blue Garansi Resmi ...,Rp435.000,5,2 Terjual,KOTA JAKARTA PUSAT,https://shopee.co.id/SoundMAGIC-E10-in-Earphon...
