In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
import time
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import locale
import numpy as np
import openpyxl
locale.setlocale(locale.LC_ALL, 'IND')
conv = locale.localeconv()


In [7]:
from pathlib import Path
from copy import copy
from typing import Union, Optional
import numpy as np
import pandas as pd
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter


def copy_excel_cell_range(
        src_ws: openpyxl.worksheet.worksheet.Worksheet,
        min_row: int = None,
        max_row: int = None,
        min_col: int = None,
        max_col: int = None,
        tgt_ws: openpyxl.worksheet.worksheet.Worksheet = None,
        tgt_min_row: int = 1,
        tgt_min_col: int = 1,
        with_style: bool = True
) -> openpyxl.worksheet.worksheet.Worksheet:
    """
    copies all cells from the source worksheet [src_ws] starting from [min_row] row
    and [min_col] column up to [max_row] row and [max_col] column
    to target worksheet [tgt_ws] starting from [tgt_min_row] row
    and [tgt_min_col] column.

    @param src_ws:  source worksheet
    @param min_row: smallest row index in the source worksheet (1-based index)
    @param max_row: largest row index in the source worksheet (1-based index)
    @param min_col: smallest column index in the source worksheet (1-based index)
    @param max_col: largest column index in the source worksheet (1-based index)
    @param tgt_ws:  target worksheet.
                    If None, then the copy will be done to the same (source) worksheet.
    @param tgt_min_row: target row index (1-based index)
    @param tgt_min_col: target column index (1-based index)
    @param with_style:  whether to copy cell style. Default: True

    @return: target worksheet object
    """
    if tgt_ws is None:
        tgt_ws = src_ws

    # https://stackoverflow.com/a/34838233/5741205
    for row in src_ws.iter_rows(min_row=min_row, max_row=max_row,
                                min_col=min_col, max_col=max_col):
        for cell in row:
            tgt_cell = tgt_ws.cell(
                row=cell.row + tgt_min_row - 1,
                column=cell.col_idx + tgt_min_col - 1,
                value=cell.value
            )
            if with_style and cell.has_style:
                # tgt_cell._style = copy(cell._style)
                tgt_cell.font = copy(cell.font)
                tgt_cell.border = copy(cell.border)
                tgt_cell.fill = copy(cell.fill)
                tgt_cell.number_format = copy(cell.number_format)
                tgt_cell.protection = copy(cell.protection)
                tgt_cell.alignment = copy(cell.alignment)
    return tgt_ws


def append_df_to_excel(
        filename: Union[str, Path],
        df: pd.DataFrame,
        sheet_name: str = 'Sheet1',
        startrow: Optional[int] = None,
        max_col_width: int = 30,
        autofilter: bool = False,
        fmt_int: str = "#,##0",
        fmt_float: str = "#,##0.00",
        fmt_date: str = "yyyy-mm-dd",
        fmt_datetime: str = "yyyy-mm-dd hh:mm",
        truncate_sheet: bool = False,
        storage_options: Optional[dict] = None,
        **to_excel_kwargs
) -> None:
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    @param filename: File path or existing ExcelWriter
                     (Example: '/path/to/file.xlsx')
    @param df: DataFrame to save to workbook
    @param sheet_name: Name of sheet which will contain DataFrame.
                       (default: 'Sheet1')
    @param startrow: upper left cell row to dump data frame.
                     Per default (startrow=None) calculate the last row
                     in the existing DF and write to the next row...
    @param max_col_width: maximum column width in Excel. Default: 40
    @param autofilter: boolean - whether add Excel autofilter or not. Default: False
    @param fmt_int: Excel format for integer numbers
    @param fmt_float: Excel format for float numbers
    @param fmt_date: Excel format for dates
    @param fmt_datetime: Excel format for datetime's
    @param truncate_sheet: truncate (remove and recreate) [sheet_name]
                           before writing DataFrame to Excel file
    @param storage_options: dict, optional
        Extra options that make sense for a particular storage connection, e.g. host, port,
        username, password, etc., if using a URL that will be parsed by fsspec, e.g.,
        starting “s3://”, “gcs://”.
    @param to_excel_kwargs: arguments which will be passed to `DataFrame.to_excel()`
                            [can be a dictionary]
    @return: None

    Usage examples:

    >>> append_df_to_excel('/tmp/test.xlsx', df, autofilter=True,
                           freeze_panes=(1,0))

    >>> append_df_to_excel('/tmp/test.xlsx', df, header=None, index=False)

    >>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
                           index=False)

    >>> append_df_to_excel('/tmp/test.xlsx', df, sheet_name='Sheet2',
                           index=False, startrow=25)

    >>> append_df_to_excel('/tmp/test.xlsx', df, index=False,
                           fmt_datetime="dd.mm.yyyy hh:mm")

    (c) [MaxU](https://stackoverflow.com/users/5741205/maxu?tab=profile)
    """
    def set_column_format(ws, column_letter, fmt):
        for cell in ws[column_letter]:
            cell.number_format = fmt
    filename = Path(filename)
    file_exists = filename.is_file()
    # process parameters
    # calculate first column number
    # if the DF will be written using `index=True`, then `first_col = 2`, else `first_col = 1`
    first_col = int(to_excel_kwargs.get("index", True)) + 1
    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')
    # save content of existing sheets
    if file_exists:
        wb = load_workbook(filename)
        sheet_names = wb.sheetnames
        sheet_exists = sheet_name in sheet_names
        sheets = {ws.title: ws for ws in wb.worksheets}

    with pd.ExcelWriter(
        filename.with_suffix(".xlsx"),
        engine="openpyxl",
        mode="a" if file_exists else "w",
        if_sheet_exists="new" if file_exists else None,
        date_format=fmt_date,
        datetime_format=fmt_datetime,
        storage_options=storage_options
    ) as writer:
        if file_exists:
            # try to open an existing workbook
            writer.book = wb
            # get the last row in the existing Excel sheet
            # if it was not specified explicitly
            if startrow is None and sheet_name in writer.book.sheetnames:
                startrow = writer.book[sheet_name].max_row
            # truncate sheet
            if truncate_sheet and sheet_name in writer.book.sheetnames:
                # index of [sheet_name] sheet
                idx = writer.book.sheetnames.index(sheet_name)
                # remove [sheet_name]
                writer.book.remove(writer.book.worksheets[idx])
                # create an empty sheet [sheet_name] using old index
                writer.book.create_sheet(sheet_name, idx)
            # copy existing sheets
            writer.sheets = sheets
        else:
            # file doesn't exist, we are creating a new one
            startrow = 0

        # write out the DataFrame to an ExcelWriter
        df.to_excel(writer, sheet_name=sheet_name, **to_excel_kwargs)
        worksheet = writer.sheets[sheet_name]

        if autofilter:
            worksheet.auto_filter.ref = worksheet.dimensions

        for xl_col_no, dtyp in enumerate(df.dtypes, first_col):
            col_no = xl_col_no - first_col
            width = max(df.iloc[:, col_no].astype(str).str.len().max(),
                        len(df.columns[col_no]) + 6)
            width = min(max_col_width, width)
            column_letter = get_column_letter(xl_col_no)
            worksheet.column_dimensions[column_letter].width = width
            if np.issubdtype(dtyp, np.integer):
                set_column_format(worksheet, column_letter, fmt_int)
            if np.issubdtype(dtyp, np.floating):
                set_column_format(worksheet, column_letter, fmt_float)

    if file_exists and sheet_exists:
        # move (append) rows from new worksheet to the `sheet_name` worksheet
        wb = load_workbook(filename)
        # retrieve generated worksheet name
        new_sheet_name = set(wb.sheetnames) - set(sheet_names)
        if new_sheet_name:
            new_sheet_name = list(new_sheet_name)[0]
        # copy rows written by `df.to_excel(...)` to
        copy_excel_cell_range(
            src_ws=wb[new_sheet_name],
            tgt_ws=wb[sheet_name],
            tgt_min_row=startrow + 1,
            with_style=True
        )
        # remove new (generated by Pandas) worksheet
        del wb[new_sheet_name]
        wb.save(filename)
        wb.close()

In [8]:

## SEPATU NIKE BOLA
url = "https:/*******sepatu-bola-nike"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(18):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)         

print(produk) 
produk.to_excel(r'F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx', sheet_name= 'NIKE', index=False)
driver.close()

                                          nama_produk      harga  harga_jual
0   SEPATU BOLA - NIKE PHANTOM GX ELITE FG SPECIAL...  5500000.0     5968000
1   SEPATU BOLA - NIKE PHANTOM GX ELITE FUSION FG ...  5000000.0     5425000
2   SEPATU BOLA - NIKE TIEMPO LEGEND 9 ELITE ITALY...  5500000.0     5968000
3   SEPATU BOLA - NIKE ZOOM SUPERFLY 9 ELITE SG-PR...  3969000.0     4306000
4   SEPATU BOLA - NIKE ZOOM VAPOR 15 ELITE AG-PRO ...  3699000.0     4013000
..                                                ...        ...         ...
75  SEPATU BOLA - NIKE PHANTOM GT2 ELITE DF FG ORI...  2588000.0     2808000
76  SEPATU BOLA - NIKE TIEMPO LEGEND 9 CLUB FG/MG ...   558000.0      603000
77  SEPATU BOLA - NIKE PHANTOM GT2 ACADEMY FG/MG O...   978000.0     1056000
78  SEPATU BOLA - NIKE TIEMPO LEGEND 9 ELITE ITALY...  5500000.0     5968000
79  SEPATU BOLA - NIKE PHANTOM GT2 ELITE FG ORIGIN...  2878000.0     3123000

[80 rows x 3 columns]
                                          nama_produk

In [9]:
## SEPATU NIKE futsal
url = "https://********/sepatu-futsal-nike?sort=9"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(18):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)      

path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='NIKE', header = None, index=False)
    
print('check data path')
driver.close()

                                          nama_produk      harga  harga_jual
0   SEPATU FUTSAL - NIKE TIEMPO LEGEND 9 CLUB IC O...   551800.0      596000
1   SEPATU FUTSAL - NIKE VAPOR 14 ACADEMY TF ORIGI...   778000.0      840000
2   SEPATU FUTSAL - NIKE TIEMPO LEGEND 9 ACADEMY I...   788000.0      851000
3   SEPATU FUTSAL - NIKE TIEMPO LEGEND 9 ACADEMY I...   798000.0      862000
4   SEPATU FUTSAL - NIKE ZOOM VAPOR 15 ACADEMY IC ...   973800.0     1052000
5   SEPATU FUTSAL - NIKE PHANTOM GT2 ACADEMY IC OR...   973800.0     1052000
6   SEPATU FUTSAL - NIKE ZOOM VAPOR 15 ACADEMY IC ...   973800.0     1052000
7   SEPATU FUTSAL - NIKE ZOOM VAPOR 15 ACADEMY TF ...   978000.0     1056000
8   SEPATU FUTSAL - NIKE STREETGATO ORIGINAL DC846...   998000.0     1078000
9   SEPATU FUTSAL / CASUAL - NIKE STREETGATO ORIGI...   998000.0     1078000
10  SEPATU FUTSAL / CASUAL - NIKE STREETGATO HOLLA...   998000.0     1078000
11  SEPATU FUTSAL / CASUAL - NIKE STREETGATO BRASI...   998000.0     1078000

In [10]:
##SEPATU PUMA BOLA

url = "https://*********/sepatu-bola-puma"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(10):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)        
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='PUMA', index=False)
print('check data path')
driver.close()

                                          nama_produk      harga  harga_jual
0   SEPATU BOLA - PUMA FUTURE Z 1.4 NEYMAR JR FG/A...  2798000.0     3036000
1   SEPATU BOLA - PUMA FUTURE ULTIMATE CREATIVITY ...  5000000.0     5425000
2   SEPATU BOLA - PUMA ULTRA ULTIMATE FG/AG ORIGIN...  2498000.0     2710000
3   SEPATU BOLA - PUMA ULTRA ULTIMATE FG/AG ORIGIN...  2498000.0     2710000
4   SEPATU BOLA - PUMA FUTURE ULTIMATE LOW FG/AG O...  2498000.0     2710000
5   SEPATU BOLA - PUMA FUTURE Z 3.4 FG/AG ORIGINAL...   978000.0     1056000
6   SEPATU BOLA - PUMA KING PRO 21 FG ORIGINAL 106...   998000.0     1078000
7   SEPATU BOLA - PUMA ULTRA PLAY FG/AG ORIGINAL 1...   709800.0      767000
8   SEPATU BOLA - PUMA ULTRA MATCH FG/AG ORIGINAL ...  1099800.0     1193000
9   SEPATU BOLA - PUMA FUTURE PLAY FG/AG ORIGINAL ...   709800.0      767000
10  SEPATU BOLA - PUMA FUTURE MATCH FG/AG ORIGINAL...  1099800.0     1193000
11  SEPATU BOLA - PUMA FUTURE MATCH + LL FG/AG ORI...  1262800.0     1370000

In [11]:
##SEPATU PUMA futsal

url = "https://***********/sepatu-futsal-puma?sort=9"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(10):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)        

path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='PUMA',header = None, index=False)

print('check data path')
driver.close()

                                          nama_produk      harga  harga_jual
0   SEPATU FUTSAL - PUMA MONARCH II TT ORIGINAL 10...   698000.0      754000
1   SEPATU FUTSAL - PUMA ULTRA PLAY IT ORIGINAL 10...   709800.0      767000
2   SEPATU FUTSAL - PUMA ULTRA PLAY TT ORIGINAL 10...   718000.0      775000
3   SEPATU FUTSAL - PUMA KING 21 IT ORIGINAL 10669602   788800.0      852000
4   SEPATU FUTSAL - PUMA KING 21 IT ORIGINAL 10669601   788800.0      852000
5   SEPATU FUTSAL - PUMA ULTRA MATCH TT ORIGINAL 1...   898000.0      970000
6   SEPATU FUTSAL - PUMA ULTRA MATCH IT ORIGINAL 1...  1025800.0     1113000
7   SEPATU FUTSAL - PUMA ULTRA MATCH IT ORIGINAL 1...  1025800.0     1113000
8   SEPATU FUTSAL - PUMA ULTRA MATCH IT ORIGINAL 1...  1099800.0     1193000
9   SEPATU FUTSAL - PUMA KING TOP TT TURF ORIGINAL...  1318000.0     1430000
10  SEPATU FUTSAL - PUMA ULTRA ULTIMATE COURT ORIG...  1498000.0     1625000
11  SEPATU FUTSAL - PUMA FUTURE Z 1.4 PRO CAGE ORI...  1698000.0     1842000

In [12]:
##SEPATU SPECS BOLA

url = "https://**************/sepatu-bola-specs"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(18):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)     
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='SPECS', index=False)
driver.close()

                                          nama_produk     harga  harga_jual
0   SEPATU BOLA - SPECS REACTO BLITZ 2 ELITE FG OR...  670800.0      724000
1   SEPATU BOLA - SPECS THUNDERBEAST FG ORIGINAL 1...  291800.0      315000
2   SEPATU BOLA - SPECS ACCELERATOR LIGHTSPEED EVO...  473800.0      512000
3   SEPATU BOLA - SPECS ACCELERATOR ARROW FG ORIGI...  338800.0      366000
4   SEPATU BOLA - SPECS THUNDERBEAST FG ORIGINAL 1...  291800.0      315000
..                                                ...       ...         ...
56  SEPATU BOLA - SPECS SPEEDBLAZE FG ORIGINAL 101923  499800.0      540000
57  SEPATU BOLA - SPECS LIGHTSPEED OMEGA FG ORIGIN...  549800.0      594000
58        SEPATU BOLA - SPECS DIME FG ORIGINAL 101561  399800.0      432000
59       SEPATU BOLA - SPECS AZURA FG ORIGINAL 101915  399800.0      432000
60  SEPATU BOLA - SPECS HYPERSONIC FG ORIGINAL 101905  339500.0      367000

[61 rows x 3 columns]


In [13]:
##SEPATU specs futsal

url = "https://************/sepatu-futsal-specs?sort=9"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(18):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)       
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='SPECS', header = None, index = False)
print('check data path')
driver.close()

                                          nama_produk     harga  harga_jual
0   SEPATU FUTSAL - SPECS THUNDERBEAST IN ORIGINAL...  291800.0      315000
1   SEPATU FUTSAL - SPECS ACCELERATOR ELEVATION ZE...  291800.0      315000
2   SEPATU FUTSAL - SPECS ACCELERATOR ELEVATION ZE...  295800.0      319000
3    SEPATU FUTSAL - SPECS GRIFFIN IN ORIGINAL 401672  315800.0      341000
4    SEPATU FUTSAL - SPECS GRIFFIN IN ORIGINAL 401674  315800.0      341000
5   SEPATU FUTSAL - SPECS ACCELERATOR ARROW IN ORI...  338800.0      366000
6   SEPATU FUTSAL - SPECS ACCELERATOR ARROW IN ORI...  343800.0      371000
7   SEPATU FUTSAL - SPECS ACCELERATOR ARROW IN ORI...  343800.0      371000
8   SEPATU FUTSAL - SPECS ACCELERATOR ALPHA NERVE ...  359800.0      389000
9   SEPATU FUTSAL - SPECS ACCELERATOR ALPHA NERVE ...  359800.0      389000
10  SEPATU FUTSAL - SPECS METASALA SANSIRO ORIGINA...  370800.0      400000
11  SEPATU FUTSAL - SPECS METASALA SANSIRO ORIGINA...  370800.0      400000
12  SEPATU F

In [14]:

##SEPATU ADIDAS BOLA

url = "https:/**************/sepatu-bola-adidas"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(18):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)        
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='ADIDAS', index = False)
driver.close()

                                          nama_produk      harga  harga_jual
0   SEPATU BOLA - ADIDAS PREDATOR ACCURACY .1 L AG...  2922800.0     3171000
1   SEPATU BOLA - ADIDAS X SPEEDPORTAL .1 SG ORIGI...  3500000.0     3798000
2   SEPATU BOLA - ADIDAS PREDATOR ACCURACY .3 L FG...   948000.0     1024000
3   SEPATU BOLA - ADIDAS PREDATOR ACCURACY .3 FG O...  1105800.0     1200000
4   SEPATU BOLA - ADIDAS PREDATOR ACCURACY .3 FG O...  1105800.0     1200000
..                                                ...        ...         ...
75  SEPATU BOLA, SOCCER, BOOTS - ADIDAS COPA MUNDI...  3000000.0     3255000
76  SEPATU BOLA - ADIDAS X SPEEDPORTAL .3 LL FG CL...  1198000.0     1300000
77  SEPATU BOLA - ADIDAS PREDATOR ABSOLUTE FG ZIDA...  7500000.0     8138000
78  SEPATU BOLA - ADIDAS PREDATOR EDGE .3 FG ORIGI...   898000.0      970000
79  SEPATU BOLA - ADIDAS PREDATOR EDGE .1 L FG ORI...  1748000.0     1897000

[80 rows x 3 columns]


In [15]:

##SEPATU ADIDAS futsal

url = "https:/*************************/sepatu-futsal-adidas?sort=9"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(18):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk) 
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='ADIDAS', header = None, index = False)

print('check data path')
driver.close()

                                          nama_produk      harga  harga_jual
0   SEPATU FUTSAL - ADIDAS SUPER SALA 2 IN ORIGINA...   788000.0      851000
1   SEPATU FUTSAL - ADIDAS TOP SALA COMPETITION IN...   868000.0      937000
2   SEPATU FUTSAL - ADIDAS TOP SALA COMPETITION IN...   868000.0      937000
3   SEPATU FUTSAL - ADIDAS TOP SALA COMPETITION IN...   868000.0      937000
4   SEPATU FUTSAL - ADIDAS COPA PURE .3 TF ORIGINA...   998000.0     1078000
5   SEPATU FUTSAL - ADIDAS X SPEEDPORTAL .3 IN ORI...  1028000.0     1115000
6   SEPATU FUTSAL - ADIDAS PREDATOR ACCURACY .3 IN...  1105800.0     1200000
7   SEPATU FUTSAL - ADIDAS PREDATOR ACCURACY .3 L ...  1198000.0     1300000
8   SEPATU FUTSAL - ADIDAS X SPEEDPORTAL .3 TF ORI...  1198000.0     1300000
9   SEPATU FUTSAL - ADIDAS X SPEEDPORTAL MESSI .3 ...  1198000.0     1300000
10  SEPATU FUTSAL - ADIDAS SAMBA CLASSIC IN ORIGIN...  1198000.0     1300000
11  SEPATU FUTSAL - ADIDAS SAMBA CLASSIC IN ORIGIN...  1198000.0     1300000

In [16]:
##SEPATU MILLS BOLA

url = "https://******************/sepatu-bola-mills"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(10):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)      

path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='MILLS', index = False)
driver.close()

                                          nama_produk     harga  harga_jual
0     SEPATU BOLA - MILLS TROYA + FG ORIGINAL 9300109  321800.0      348000
1     SEPATU BOLA - MILLS TROYA + FG ORIGINAL 9300108  321800.0      348000
2     SEPATU BOLA - MILLS TROYA + FG ORIGINAL 9300107  321800.0      348000
3     SEPATU BOLA - MILLS HERZONE FG ORIGINAL 9301903  321800.0      348000
4     SEPATU BOLA - MILLS HERZONE FG ORIGINAL 9301902  321800.0      348000
5     SEPATU BOLA - MILLS HERZONE FG ORIGINAL 9301901  321800.0      348000
6       SEPATU BOLA - MILLS DAVOR FG ORIGINAL 9301401  278000.0      300000
7       SEPATU BOLA - MILLS DAVOR FG ORIGINAL 9301402  278000.0      300000
8       SEPATU BOLA - MILLS DAVOR FG ORIGINAL 9301403  278000.0      300000
9   SEPATU BOLA - MILLS XYCLOPS KALDERA FG ORIGINA...  508000.0      549000
10  SEPATU BOLA - MILLS T-RITON SABRE FG ORIGINAL ...  335800.0      363000
11     SEPATU BOLA - MILLS VULCAN FG ORIGINAL 9300303  298800.0      323000
12     SEPAT

In [17]:
##SEPATU MILLS futsal

url = "https://***********************/sepatu-futsal-mills"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(10):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)    
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='MILLS', header = None, index = False)
    
print('check data path')
driver.close()

                                          nama_produk     harga  harga_jual
0   SEPATU FUTSAL - MILLS TROYA + IN ORIGINAL 9400109  295800.0      319000
1   SEPATU FUTSAL - MILLS TROYA + IN ORIGINAL 9400108  295800.0      319000
2   SEPATU FUTSAL - MILLS TROYA + IN ORIGINAL 9400107  295800.0      319000
3     SEPATU FUTSAL - MILLS DAVOR IN ORIGINAL 9401402  278000.0      300000
4     SEPATU FUTSAL - MILLS DAVOR IN ORIGINAL 9401401  278000.0      300000
5   SEPATU FUTSAL - MILLS XYCLOPS BLAST IN ORIGINA...  379800.0      410000
6   SEPATU FUTSAL - MILLS XYCLOPS BLAST IN ORIGINA...  379800.0      410000
7   SEPATU FUTSAL - MILLS XYCLOPS BLAST IN ORIGINA...  379800.0      410000
8   SEPATU FUTSAL - MILLS XYCLOPS KALDERA IN ORIGI...  508000.0      549000
9   SEPATU FUTSAL - MILLS XYCLOPS KALDERA IN ORIGI...  508000.0      549000
10   SEPATU FUTSAL - MILLS VULCAN IN ORIGINAL 9400304  338800.0      366000
11   SEPATU FUTSAL - MILLS MATERA IN ORIGINAL 9401002  338800.0      366000
12   SEPATU 

In [18]:
##SEPATU MIZUNO BOLA
import locale
locale.setlocale(locale.LC_ALL, 'IND')
conv = locale.localeconv()


url = "https://*************************sepatu-bola-mizuno"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(10):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)     
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='MIZUNO', index = False)
print('check data path')
driver.close()


                                          nama_produk      harga  harga_jual
0   SEPATU BOLA - MIZUNO MORELIA NEO III PRO MIX S...  1358000.0     1473000
1   SEPATU BOLA - MIZUNO ALPHA SELECT FG ORIGINAL ...  1038000.0     1126000
2   SEPATU BOLA - MIZUNO MONARCIDA NEO II PRO FG O...  1278000.0     1387000
3   SEPATU BOLA - MIZUNO MORELIA NEO III BETA JAPA...  3999800.0     4340000
4   SEPATU BOLA - MIZUNO MORELIA NEO III JAPAN FG ...  3399800.0     3689000
5   SEPATU BOLA - MIZUNO ALPHA JAPAN FG ORIGINAL P...  3599800.0     3906000
6   SEPATU BOLA - MIZUNO MORELIA NEO III PRO FG OR...  1278000.0     1387000
7   SEPATU BOLA - MIZUNO MORELIA NEO III B ELITE F...  1998000.0     2168000
8   SEPATU BOLA - MIZUNO MORELIA NEO III ELITE FG ...  1678000.0     1821000
9   SEPATU BOLA - MIZUNO ALPHA JAPAN AG ORIGINAL P...  3599800.0     3906000
10  SEPATU BOLA - MIZUNO MORELIA NEO III JAPAN MIX...  3699800.0     4014000
11  SEPATU BOLA - MIZUNO ALPHA JAPAN MIX SG ORIGIN...  3899800.0     4231000

In [19]:
##SEPATU MIZUNO BOLA
import locale
locale.setlocale(locale.LC_ALL, 'IND')
conv = locale.localeconv()


url = "https://*************************/sepatu-futsal-mizuno"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(10):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk)     
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='MIZUNO',header = None, index = False)
print('check data path')
driver.close()


                                          nama_produk      harga  harga_jual
0   SEPATU FUTSAL - MIZUNO MORELIA SALA CLASSIC TF...   878000.0      948000
1   SEPATU FUTSAL - MIZUNO MORELIA SALA CLASSIC IN...   878000.0      948000
2   SEPATU FUTSAL - MIZUNO MORELIA SALA CLASSIC IN...   878000.0      948000
3   SEPATU FUTSAL - MIZUNO MONARCIDA NEO SALA PRO ...   958000.0     1035000
4   SEPATU FUTSAL - MIZUNO MORELIA SALA JAPAN TF O...  2499800.0     2712000
5   SEPATU FUTSAL - MIZUNO MORELIA SALA ELITE IN O...  1518000.0     1647000
6   SEPATU FUTSAL - MIZUNO MORELIA IN ORIGINAL Q1G...  1438000.0     1560000
7   SEPATU FUTSAL - MIZUNO ALPHA ELITE AS TF ORIGI...  1918000.0     2081000
8   SEPATU FUTSAL - MIZUNO MORELIA SALA ELITE IN O...  1438000.0     1560000
9   SEPATU FUTSAL - MIZUNO MORELIA NEO SALA BETA J...  2699800.0     2929000
10  SEPATU FUTSAL - MIZUNO MORELIA SALA CLASSIC TF...   878000.0      948000
11  SEPATU FUTSAL - MIZUNO REBULA SALA SELECT IN O...   699800.0      756000

In [20]:
##aksesoris BOLA ALL(kaoskaki)

url = "https://******************/kaos-kaki-sepak-bola?sort=9"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(17):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.2)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.2)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk) 
path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='AKSESORIS', index = False)

print('check data path')
driver.close()

                                          nama_produk     harga  harga_jual
0   KAOS KAKI JUNIOR - SEPAK BOLA 1 PASANG LOB ORA...   20000.0       24000
1   KAOS KAKI SEPAK BOLA 1 PASANG LOB PUTIH MERAH ...   20000.0       24000
2   KAOS KAKI SEPAK BOLA 1 PASANG SEVENSTAR NAVY S...   20000.0       24000
3      KAOS KAKI - SEVENSTARS ORIGINAL ART#SEVENSTARS   20000.0       24000
4   KAOS KAKI - SEPAK BOLA 1 PASANG EURO RED ORIGI...   25000.0       30000
..                                                ...       ...         ...
75  KAOS KAKI - LOTTO ENERGIA FB SOCKS ORIGINAL AL...   50000.0       60000
76  KAOS KAKI ANTI SLIP - WILDCAT SOCCER MAROON OR...   57900.0       69000
77  KAOS KAKI - NIKE ACADEMY KNEE HIGH ORIGINAL SX...  128000.0      154000
78  KAOS KAKI - ADIDAS MILANO 16 SOCK ORIGINAL AJ5...  140000.0      168000
79    KAOS KAKI - ADIDAS MILANO SOCK ORIGINAL #AJ5905  140000.0      168000

[80 rows x 3 columns]
check data path


In [21]:
##aksesoris BOLA ALL (deker)

url = "https://*****************/product?q=deker&sort=9"
driver = webdriver.Chrome()
driver.get(url)

WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#zeus-root")))
time.sleep(2)

for i in range(3):
    driver.execute_script("window.scrollBy(0,250)")
    time.sleep(1)

soup = BeautifulSoup(driver.page_source, "html.parser")
soup1 = BeautifulSoup(soup.prettify(), "html.parser")

produk = []
for item in soup.findAll('div', class_="css-1sn1xa2"):
    nama_produk = item.find('div', class_="prd_link-product-name css-3um8ox").text
    harga = item.find('div', class_="prd_link-product-price css-1ksb19c").text
    #link_produk = item.find('div', class_="pcv3__info-content css-gwkf0u", href="")
    #print(nama_produk)
    #print(harga)
    #print(link_produk.get('href'))
    produk.append((nama_produk,harga))

produk = pd.DataFrame(produk, columns=['nama_produk','harga'])
produk['harga']=produk.harga.map(lambda x: locale.atof(x.strip('Rp.')))
produk.loc[produk['harga'] <= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.080)
produk.loc[produk['harga'] >= 1000000, 'harga_jual'] = produk['harga']+(produk['harga']*0.085)
produk['harga_jual'] = (np.round(produk.harga_jual / 1000) * 1000).astype(int)

print(produk) 


path = "F:\Marsa\Project Data\AAAsepatu\produk_doubleu.xlsx"
append_df_to_excel(path, produk,sheet_name='AKSESORIS',header = None, index = False)

print('check data path')
driver.close()



                                          nama_produk     harga  harga_jual
0           WRIST BAND / DEKER TANGAN LILIT SPORT PAD   50000.0       54000
1   DEKER PELINDUNG SHINGUARD - SPECS OPTIMUS 2 OR...   58000.0       63000
2   DEKER PELINDUNG SHINGUARD - SPECS OPTIMUS 2 OR...   58000.0       63000
3   DEKER PELINDUNG SHINGUARD - SPECS OPTIMUS 2 OR...   58000.0       63000
4   DEKER LUTUT - LP SUPPORT KNEE SUPPORT ORIGINAL...   80000.0       86000
..                                                ...       ...         ...
75  DEKER PELINDUNG SHINGUARD - PUMA ULTRA FLEX SL...  398000.0      430000
76  DEKER PELINDUNG SHINGUARD - NIKE MERCURIAL LIT...  398000.0      430000
77  DEKER PELINDUNG SHINGUARD - NIKE MERCURIAL LIT...  398000.0      430000
78  DEKER PELINDUNG SHINGUARD - NIKE MERCURIAL LIT...  398000.0      430000
79  DEKER PELINDUNG SHINGUARD - NIKE MERCURIAL LIT...  398000.0      430000

[80 rows x 3 columns]
check data path
