<h3>StyleKorean Web Store Scrpaing Script</h3>

In [42]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import time

# Initialize Chrome WebDriver with options
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
driver = webdriver.Chrome(options=options)

# Credentials for login
username = "park01"
password = "park01"

# Initialize a list to store product details
product_details = []

try:
    # Step 1: Navigate to the login page
    driver.get("https://wholesale.stylekorean.com/Member/SignIn")
    time.sleep(3)

    # Step 2: Enter username and password
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'user_id')))
    driver.find_element(By.ID, 'user_id').send_keys(username)
    driver.find_element(By.ID, 'pwd').send_keys(password)
    driver.find_element(By.CLASS_NAME, 'Btn_Login').click()

    # Step 3: Wait for login to complete
    WebDriverWait(driver, 10).until(EC.url_contains("https://wholesale.stylekorean.com/"))

    # Step 4: Redirect to the best-seller products page
    driver.get("https://wholesale.stylekorean.com/Product/BestProduct")
    time.sleep(3)

    # Step 5: Find the total number of pages
    pagination = driver.find_elements(By.CLASS_NAME, 'page-link')
    total_pages = max([int(btn.get_attribute('page')) for btn in pagination if btn.get_attribute('page') and btn.get_attribute('page').isdigit()], default=1)

    # Step 6: Loop through pages
    for page in range(1, total_pages + 1):
        print(f"Scraping page {page}...")

        # Navigate to the current page
        driver.get(f"https://wholesale.stylekorean.com/Product/BestProduct?page={page}")
        time.sleep(3)

        # Step 7: Locate product list
        products = driver.find_elements(By.CLASS_NAME, 'Img_Product')
        if not products:
            print(f"No products found on page {page}. Skipping.")
            continue

        for i in range(len(products)):
            try:
                # Re-locate the product elements
                products = driver.find_elements(By.CLASS_NAME, 'Img_Product')

                if i >= len(products):  # Safety check for index out of range
                    print(f"Skipping index {i}, exceeds product list length.")
                    continue

                product_image = products[i]

                # Ensure the element is clickable
                WebDriverWait(driver, 10).until(EC.element_to_be_clickable(product_image))

                # Scroll to the element to make it clickable
                driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", product_image)

                # Use ActionChains to click on the element
                ActionChains(driver).move_to_element(product_image).click().perform()

                time.sleep(3)  # Allow page to load

                # Step 8: Scrape product details
                html = driver.page_source
                soup = BeautifulSoup(html, 'html.parser')

                # Extract the product title
                title_element = soup.find('div', class_='item_name item_list')
                title = title_element.text.strip() if title_element else 'Not specified'

                # Extract the product main image URL
                main_image_element = soup.find('div', class_='item_video')
                main_image_url = main_image_element.find('img')['src'] if main_image_element and main_image_element.find('img') else 'Not specified'

                # Extract product description
                description_section = soup.find('div', class_='description')
                description_parts = description_section.find_all('div', class_='pro_infor_content_style') if description_section else []
                description = " ".join([part.text.strip() for part in description_parts if part.text.strip()]) if description_parts else 'Not specified'

                # Extract other product images (inside the description div)
                other_images = [img['src'] for img in description_section.find_all('img') if img.get('src')] if description_section else []

                # Extract other existing product details (retail_price, etc.)
                detail_section = soup.find('div', class_='detail_contents')

                # Initialize details with default values
                retail_price = discounted_price = discount_rate = stock = stock_quantity = pieces_per_box = brand_name = barcode = 'Not specified'

                if detail_section:
                    retail_price_element = detail_section.find('div', class_='price')
                    retail_price = retail_price_element.find('span', class_='sale').text.strip() if retail_price_element else 'Not specified'

                    discounted_price_element = detail_section.find_all('div', class_='price')[1] if len(detail_section.find_all('div', 'price')) > 1 else None
                    discounted_price = discounted_price_element.find('span', class_='sale').text.strip() if discounted_price_element else 'Not specified'

                    discount_rate_element = detail_section.find('span', class_='rate')
                    discount_rate = discount_rate_element.text.strip() if discount_rate_element else 'Not specified'

                    stock_element = detail_section.find('span', class_='stock')
                    stock = stock_element.text.strip() if stock_element else 'Not specified'

                    stock_quantity_element = detail_section.find('span', class_='number')
                    stock_quantity = stock_quantity_element.text.strip() if stock_quantity_element else 'Not specified'

                    pieces_per_box_element = detail_section.find('span', class_='perbox')
                    pieces_per_box = pieces_per_box_element.text.strip() if pieces_per_box_element else 'Not specified'

                    item_contents = soup.find('div', class_='item_conents')
                    brand_name_element = item_contents.find('span') if item_contents else None
                    brand_name = brand_name_element.text.strip() if brand_name_element else 'Not specified'

                    barcode_element = detail_section.find('span', text=lambda x: x and x.isdigit() and len(x) >= 8)
                    barcode = barcode_element.text.strip() if barcode_element else 'Not specified'

                # Store the details
                product_details.append({
                    'title': title,
                    'main_image_url': main_image_url,
                    'description': description,
                    'product_images': other_images,
                    'retail_price': retail_price,
                    'discounted_price': discounted_price,
                    'discount_rate': discount_rate,
                    'stock': stock,
                    'stock_quantity': stock_quantity,
                    'pieces_per_box': pieces_per_box,
                    'brand_name': brand_name,
                    'barcode': barcode,
                })

                # Step 9: Go back to the product list page
                driver.back()
                time.sleep(3)

            except Exception as e:
                print(f"Error clicking product {i+1} on page {page}: {e}")

finally:
    # Close the WebDriver
    driver.quit()

# Print the product details
print(f"Scraped {len(product_details)} product details:")
for product in product_details:
    print(product)

Scraping page 1...


  barcode_element = detail_section.find('span', text=lambda x: x and x.isdigit() and len(x) >= 8)


Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraped 220 product details:
{'title': 'Relief Sun : Rice + Probiotics 50ml', 'main_image_url': 'https://wmg.stylekorean.com/JSMSM03-SR_thumb.jpg?v20250128', 'description': "*SPECIAL FREE GIFT* Relief Sun 10ml -\xa0Relief Sun is an organic sunscreen that applies gently on the skin and by also including skin calming ingredients, it allows sensitive skin types to use it with ease as well. - Lightweight and creamy type organic sunscreen that's comfortable on skin. Even if you apply a large amount several times, it is not sticky and gives a moist finish like that of a light moisturizing cream. - Demonstrating natural skin is possible without a heavy feeling even after applying several layers as the formula is moist and does not leave white cast. -\xa0Lightweight cream texture:similar to a light moistu

In [43]:
len(product_details)

220

<h3>Export Scrpaped Products in Excel File Script</h3>

In [None]:
import pandas as pd

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(product_details)

# Save the DataFrame to an Excel file
output_file = 'scraped_products.xlsx'
df.to_excel(output_file, index=False)

print(f"Scraped data has been exported to '{output_file}'.")


<h3>Map Above Products Scrapped Data Exported Excel File into Shopify Template CSV File</h3>

In [38]:
import pandas as pd

# Load the Shopify CSV template
shopify_template = pd.read_csv('Test1 - template to Farhan.csv')  # Replace with your file path
print("Shopify template loaded successfully.")

# Load the scraped data from the Excel file
scraped_data = pd.read_excel('scraped_products.xlsx')  # Replace with your file path
print("Scraped data loaded successfully.")

# Prepare a list to store transformed data rows
transformed_data = []

# Start with Variant SKU number BE01 and increase for each row
variant_sku_index = 1

# Process each row from the scraped data
for index, row in scraped_data.iterrows():
    title = row['title']
    handle = title.lower().replace(' ', '-')  # Handle: title in lowercase with hyphens
    main_image_url = row['main_image_url']
    product_images_str = row['product_images']
    
    # Debugging: Check if the product_images string is parsed correctly
    print(f"Processing product: {title}")
    print(f"Main image URL: {main_image_url}")
    print(f"Product images string: {product_images_str}")
    
    try:
        # Handle the case where the product_images_str is a string list
        if pd.isna(product_images_str) or product_images_str.strip() == '':
            product_images = []
        else:
            # Strip unwanted characters and convert string to list
            product_images = eval(product_images_str)
            
            # Ensure the product_images is indeed a list of URLs
            if not isinstance(product_images, list):
                product_images = [product_images_str]  # If it's not a valid list, treat as a single URL
    except Exception as e:
        # If any error occurs, handle it gracefully
        print(f"Error in parsing product images: {e}, using empty list.")
        product_images = []

    description = row['description']
    retail_price = row['retail_price']
    discounted_price = row['discounted_price']
    vendor = row['brand_name']
    barcode = row['barcode']
    
    # Combine the main image and additional product images
    all_images = [main_image_url] + product_images
    
    # Debugging: Show the list of all images being processed
    print(f"All images for product: {all_images}")
    
    # For each image in the product images
    for image_position, image_url in enumerate(all_images, start=1):
        # Create a copy of the Shopify template for each row
        new_row = shopify_template.iloc[0].copy()  # Copy the first row as a base template
        
        # Set values for the required columns
        new_row['Handle'] = handle
        new_row['Title'] = title if image_position == 1 else ''
        new_row['Body (HTML)'] = f"<p>{description}</p>" if image_position == 1 else ''
        new_row['Vendor'] = vendor if image_position == 1 else ''
        new_row['Tags'] = handle if image_position == 1 else ''
        new_row['Published'] = 'TRUE'
        new_row['Variant SKU'] = f"BE{str(variant_sku_index).zfill(2)}"
        new_row['Variant Grams'] = 0
        new_row['Variant Inventory Tracker'] = 'shopify'
        new_row['Variant Inventory Policy'] = 'deny'
        new_row['Variant Fulfillment Service'] = 'manual'
        new_row['Variant Price'] = retail_price.replace('KRW', '').strip() if image_position == 1 else ''
        new_row['Variant Compare At Price'] = discounted_price.replace('USD', '').strip() if image_position == 1 else ''
        new_row['Variant Requires Shipping'] = 'TRUE'
        new_row['Variant Taxable'] = 'TRUE'
        new_row['Variant Barcode'] = barcode if image_position == 1 else ''
        new_row['Image Src'] = image_url
        new_row['Image Position'] = image_position
        new_row['Gift Card'] = 'FALSE'
        new_row['Variant Weight Unit'] = 'KG'
        new_row['Included / Hong Kong SAR'] = 'TRUE'
        new_row['Included / Canada & Rest World'] = 'TRUE'
        new_row['Included / International 1'] = 'TRUE'
        new_row['Included / International 2'] = 'TRUE'
        new_row['Included / í•´ì™¸'] = 'TRUE'
        new_row['Status'] = 'active'
        
        # Append the new row to the transformed data list
        transformed_data.append(new_row)
        
        # Increment the variant SKU index
        variant_sku_index += 1

# Create a DataFrame from the transformed data
df_transformed = pd.DataFrame(transformed_data)

# Save the transformed data to a new CSV file, aligned with the Shopify template
df_transformed.to_csv('updated_shopify_template.csv', index=False)

# Print the first few rows to verify the output
print("Transformation complete. Here's the first few rows:")
print(df_transformed.head())


Shopify template loaded successfully.
Scraped data loaded successfully.
Processing product: Relief Sun : Rice + Probiotics 50ml
Main image URL: https://wmg.stylekorean.com/JSMSM03-SR_thumb.jpg?v20250128
Product images string: ['https://d2c3d01lcpw2ui.cloudfront.net/gl/data/editor/2212/0c980badb3a10b1dd99f1d700978fa77_1671076072_5466.jpg', 'https://d2c3d01lcpw2ui.cloudfront.net/gl/data/editor/2212/0c980badb3a10b1dd99f1d700978fa77_1671076154_1816.jpg']
All images for product: ['https://wmg.stylekorean.com/JSMSM03-SR_thumb.jpg?v20250128', 'https://d2c3d01lcpw2ui.cloudfront.net/gl/data/editor/2212/0c980badb3a10b1dd99f1d700978fa77_1671076072_5466.jpg', 'https://d2c3d01lcpw2ui.cloudfront.net/gl/data/editor/2212/0c980badb3a10b1dd99f1d700978fa77_1671076154_1816.jpg']
Processing product: NIACINAMIDE 10% + TXA 4% SERUM 30ml
Main image URL: https://wmg.stylekorean.com/BR000516/ANS10-S_thumb_3.jpg?v20250128
Product images string: ['https://m.media-amazon.com/images/I/51JcS+DmKhL._SL1000_.jpg', 'ht

  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle
  new_row['Handle'] = handle


Processing product: HEARTLEAF QUERCETINOL PORE DEEP CLEANSING FOAM 150ml (RENEW)
Main image URL: https://wmg.stylekorean.com/BR000516/ANC01-FQRR_thumb.jpg?v20250128
Product images string: ['https://m.media-amazon.com/images/S/aplus-media-library-service-media/39d43ad5-a482-4896-a18b-f6565accc538.__CR0,0,970,600_PT0_SX970_V1___.jpg', 'https://m.media-amazon.com/images/I/51Yz3T9AUbL._SL1024_.jpg', 'https://m.media-amazon.com/images/I/61GCsqsXtKL._SL1024_.jpg', 'https://m.media-amazon.com/images/I/618A-zsb6sL._SL1024_.jpg', 'https://m.media-amazon.com/images/I/61X57PrjGlL._SL1024_.jpg', 'https://m.media-amazon.com/images/I/61pVKnfXfyL._SL1024_.jpg', 'https://m.media-amazon.com/images/I/51CS5yzeiwL._SL1024_.jpg']
All images for product: ['https://wmg.stylekorean.com/BR000516/ANC01-FQRR_thumb.jpg?v20250128', 'https://m.media-amazon.com/images/S/aplus-media-library-service-media/39d43ad5-a482-4896-a18b-f6565accc538.__CR0,0,970,600_PT0_SX970_V1___.jpg', 'https://m.media-amazon.com/images/I/51