In [154]:
from selenium.webdriver.chrome import webdriver, options
from selenium.webdriver.support.relative_locator import By 
from selenium.webdriver.support.select import Select 
from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import urlsplit
import re
import os
import pandas as pd
from selenium.webdriver.support.color import Color
import time


class ProductType:
    SINGLE = 'single'
    MULTI_SIZE = 'multi-size'
    MULTI_COLOR = 'multi-color'
    MULTI_SHADE = 'multi-shade'

def safe_get_element(wd: webdriver.WebDriver, by: By, value:str):
    try:
        element = wd.find_element(by, value)
        return element
    except NoSuchElementException:
        return None

def get_variation_images(wd: webdriver.WebDriver, variation_details:dict[str, object]):
    right_arrow = wd.find_element(By.CLASS_NAME, 'athenaProductImageCarousel_rightArrow')
    for i, image in enumerate(wd.find_elements(By.CLASS_NAME, 'athenaProductImageCarousel_image')):
        if i != 0:
            if EC.staleness_of(right_arrow):
                right_arrow = wd.find_element(By.CLASS_NAME, 'athenaProductImageCarousel_rightArrow')
            wd.execute_script("arguments[0].click();", right_arrow)
        if EC.staleness_of(image):
            image = wd.find_elements(By.CLASS_NAME, 'athenaProductImageCarousel_image')[i]
        variation_details[f'product_image_{i+1}'] = image.get_attribute('src')
    return variation_details

def get_variation_misc_details(wd: webdriver.WebDriver, variation_details:dict[str, object], product_id: str):
    variation_details['variant_SKU'] = product_id
    variation_details['product_name'] = wd.find_element(By.CLASS_NAME, 'productName_title').get_attribute('textContent')
    variation_details['product_rating'] = float(wd.find_element(By.CLASS_NAME, 'productReviewStarsPresentational').get_attribute('aria-label').split(' ')[0])
    variation_details['number_of_reviews'] = int(wd.find_element(By.CLASS_NAME, 'productReviewStars_numberOfReviews').text.split(' ')[0])
    variation_details['price'] = wd.find_element(By.CLASS_NAME, 'productPrice_price').text.removeprefix('£')
    try:
        wd.find_element(By.CLASS_NAME, 'productAddToBasket-soldOut')
        variation_details['in_stock'] = 'no'
    except NoSuchElementException:
        variation_details['in_stock'] = 'yes'
    return variation_details

def get_multi_size_details(wd: webdriver.WebDriver, product_details: dict[str, object]) -> list[dict[str, object]]:
    variations = []
    ids = [button.get_attribute("data-linked-product-id") for button in wd.find_elements(By.CLASS_NAME, 'athenaProductVariations_box')]
    for product_id in ids:
        button = wd.find_element(By.CSS_SELECTOR, f"button[data-linked-product-id='{product_id}']")
        variation_details = product_details.copy()
        is_selected = safe_get_element(button, By.CLASS_NAME, 'srf-hide')
        if is_selected is None:
            old_price = get_old_price(wd)
            wd.execute_script('arguments[0].click();', button)
            WebDriverWait(wd, 10).until(EC.staleness_of(old_price))
            button = wd.find_element(By.CSS_SELECTOR, f"button[data-linked-product-id='{product_id}']")
        variation_details = get_variation_misc_details(wd, variation_details, product_id)
        variation_details['size'] = button.text
        variation_details = get_variation_images(wd, variation_details)
        variations.append(variation_details)
    return variations

def get_id_from_url(url:str):
    base_name = os.path.basename(urlsplit(url).path)
    return base_name.split('.')[0].split('-')[0].strip()

def get_old_price(wd: webdriver.WebDriver):
    try:
        return wd.find_element(By.CLASS_NAME, 'productPrice_price')
    except NoSuchElementException:
        return wd.find_element(By.CLASS_NAME, 'productPrice_fromPrice')
    
def rgb_to_hex(rgb: list):
    return '#%02x%02x%02x' % (int(rgb[0]), int(rgb[1]), int(rgb[2]))

def get_multi_color_details(wd: webdriver.WebDriver, product_details: dict[str, object], product_type: str) -> list[dict[str, object]]:
    variations = []
    drop_down_list = wd.find_element(By.CLASS_NAME, 'athenaProductVariations_dropdown')
    select = Select(drop_down_list)
    for option, id in [(x.text, x.get_attribute('value')) for x in select.options if x.text.casefold() != 'Please choose...'.casefold()]:
        variation_details = product_details.copy()
        old_price = get_old_price(wd)
        select = Select(wd.find_element(By.CLASS_NAME, 'athenaProductVariations_dropdown'))
        select.select_by_visible_text(option)
        WebDriverWait(wd, 10).until(EC.staleness_of(old_price))
        variation_details = get_variation_images(wd, variation_details)
        product_id = get_id_from_url(variation_details['product_image_1'])
        variation_details = get_variation_misc_details(wd, variation_details, product_id)
        if product_type == ProductType.MULTI_COLOR:
            variation_type = 'color'
        elif product_type == ProductType.MULTI_SHADE:
            variation_type = 'shade'
        else:
            raise ValueError(f'Invalid product type: {product_type}')
        variation_details[variation_type] = option
        color = wd.find_element(By.CSS_SELECTOR, f"span[data-value-id='{id}']").value_of_css_property('background-color')
        color = Color.from_string(color).hex
        variation_details[f'{variation_type}_hex'] = color
        variations.append(variation_details)
    return variations


def get_product_details(urls: list[str] ,browser_options: options.Options):
    df = pd.DataFrame()
    with webdriver.WebDriver(browser_options) as wd:
        for url in urls:
            wd.get(url)
            product_details = {}
            product_variations = []
            brand_element = wd.find_element(By.CLASS_NAME, 'productBrandLogo_image')
            product_details['product_url'] = url
            product_details['brand_name'] = brand_element.get_attribute('title')
            product_details['brand_logo'] = brand_element.get_attribute('src')
            product_details['primary_SKU'] = get_id_from_url(url)
            for button in wd.find_elements(By.CLASS_NAME, 'productDescription_accordionControl'):
                try:
                    if not button.text:
                        continue
                    button_id = button.get_attribute("id")
                    is_expanded = button.get_attribute('aria-expanded')
                    if is_expanded == 'false':
                        wd.execute_script("arguments[0].click();", button)
                    description_content = wd.find_element(By.ID, button_id.replace('heading', 'content')).text
                    product_details[button.text] = description_content

                except ElementNotInteractableException:
                    print(f'cannot click element with id: {button_id}')
                except Exception  as e:
                    print(f'Unexpected error occurred: {e}')
            variation_label = safe_get_element(wd, By.CLASS_NAME, 'athenaProductVariations_dropdownLabel')
            if variation_label is not None:
                variation = variation_label.text.strip()
                if variation.casefold() in color_variation_tags:
                    product_details['product_type'] = ProductType.MULTI_COLOR
                    product_variations = get_multi_color_details(wd, product_details, ProductType.MULTI_COLOR)
                elif variation.casefold() in shade_variation_tags:
                    product_details['product_type'] = ProductType.MULTI_SHADE
                    product_variations = get_multi_color_details(wd, product_details, ProductType.MULTI_SHADE)
                elif variation.casefold() in size_variation_tags:
                    product_details['product_type'] = ProductType.MULTI_SIZE
                    product_variations = get_multi_size_details(wd, product_details)
                else:
                    print(f'Unknown variant type: {variation}')
            else:
                product_details['product_type'] = ProductType.SINGLE
                product_details = get_variation_images(wd, product_details)
                product_id = get_id_from_url(product_details['product_image_1'])
                product_details = get_variation_misc_details(wd, product_details, product_id)
                product_variations = [product_details]
            df = pd.concat([df, pd.DataFrame(product_variations)], ignore_index=True)
    return df

browser_options = options.Options()
browser_options.add_argument('-disable-notifications')
browser_options.add_experimental_option("prefs", {"profile.default_content_setting_values.cookies": 2})
browser_options.add_argument('-headless')

color_variation_tags = [x.casefold() for x in ['colour:', 'color:']]
shade_variation_tags = [x.casefold() for x in ['shade:']]
size_variation_tags = [x.casefold() for x in ['size:']]

PRODUCT_LINKS = ["https://www.cultbeauty.co.uk/westman-atelier-eye-pods/13324061.html?affil=thgppc&countrySelected=Y",
                 "https://www.cultbeauty.co.uk/huda-beauty-lovefest-obsessions-eyeshadow-palette/13899183.html?affil=thgppc&countrySelected=Y",
                 'https://www.cultbeauty.co.uk/hindash-manifesto-lipstick-3.5g-various-shades/13798789.html?affil=thgppc&settingsSaved=Y&shippingcountry=GB&switchcurrency=GBP&countrySelected=Y',
                 'https://www.cultbeauty.co.uk/welleco-nourishing-protein-chocolate-refill/13314044.html?affil=thgppc&countrySelected=Y',
                 'https://www.cultbeauty.co.uk/thank-you-farmer-sun-project-water-sun-cream-spf50/13313863.html?affil=thgppc&countrySelected=Y',
                 'https://www.cultbeauty.co.uk/color-wow-travel-dream-coat-supernatural-spray-50ml/11870457.html',
                 'https://www.cultbeauty.com/huda-beauty-kayali-yum-pistachio-gelato-33-eau-de-parfum-intense-10ml/14272370.html']

df = get_product_details(PRODUCT_LINKS, browser_options)
df.head()

Unnamed: 0,product_url,brand_name,brand_logo,primary_SKU,Why It's Cult,Description,How to Use,Product Details,product_type,product_image_1,...,price,in_stock,color,color_hex,Full Ingredients List,product_image_5,product_image_6,shade,shade_hex,size
0,https://www.cultbeauty.co.uk/westman-atelier-e...,Westman Atelier,https://static.thcdn.com/design-assets/images/...,13324061,"Formulated with only the finest, skin-loving i...",A luminous eyeshadow trio in two iterations fo...,Gucci created each Eye Pod palette with one cl...,Brand:\nWestman Atelier,multi-color,https://static.thcdn.com/images/large/original...,...,80.0,no,Les Jours - Out of stock,#000000,,,,,,
1,https://www.cultbeauty.co.uk/westman-atelier-e...,Westman Atelier,https://static.thcdn.com/design-assets/images/...,13324061,"Formulated with only the finest, skin-loving i...",A luminous eyeshadow trio in two iterations fo...,Gucci created each Eye Pod palette with one cl...,Brand:\nWestman Atelier,multi-color,https://static.thcdn.com/images/large/original...,...,80.0,yes,Les Nuits,#000000,,,,,,
2,https://www.cultbeauty.co.uk/huda-beauty-lovef...,Huda Beauty,https://static.thcdn.com/design-assets/images/...,13899183,With a phenomenal fan base (45+ million Instag...,We’re feeling all kinds of groovy and ready to...,Build your base with matte shadows. Pick up th...,Brand:\nHuda Beauty,single,https://static.thcdn.com/images/large/webp//pr...,...,27.0,no,,,"Deep Brown Matte (1) – Mica, Magnesium Myrista...",https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,,,
3,https://www.cultbeauty.co.uk/hindash-manifesto...,Hindash,https://static.thcdn.com/design-assets/images/...,13798789,"Taking your beauty out of this world, Hindash’...","Sealing your look with kissable softness, the ...",Pat the lipstick all over lips for a soft tint...,Brand:\nHindash,multi-shade,https://static.thcdn.com/images/large/original...,...,26.0,yes,,,"Octyldodecanol, Polysilicone11, Synthetic Wax,...",https://static.thcdn.com/images/large/original...,,Call me Peaches,#dd8670,
4,https://www.cultbeauty.co.uk/hindash-manifesto...,Hindash,https://static.thcdn.com/design-assets/images/...,13798789,"Taking your beauty out of this world, Hindash’...","Sealing your look with kissable softness, the ...",Pat the lipstick all over lips for a soft tint...,Brand:\nHindash,multi-shade,https://static.thcdn.com/images/large/original...,...,20.8,yes,,,"Octyldodecanol, Polysilicone11, Synthetic Wax,...",https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,Rest in Ross,#ab0033,


In [153]:
with pd.option_context('display.max_columns', None, 'display.max_rows', None):
    display(df.head(100))

Unnamed: 0,product_url,brand_name,brand_logo,primary_SKU,Why It's Cult,Description,How to Use,Product Details,product_type,product_image_1,product_image_2,product_image_3,product_image_4,variant_SKU,product_name,product_rating,number_of_reviews,price,in_stock,color,color_hex,Full Ingredients List,product_image_5,product_image_6,shade,shade_hex,size
0,https://www.cultbeauty.co.uk/westman-atelier-e...,Westman Atelier,https://static.thcdn.com/design-assets/images/...,13324061,"Formulated with only the finest, skin-loving i...",A luminous eyeshadow trio in two iterations fo...,Gucci created each Eye Pod palette with one cl...,Brand:\nWestman Atelier,multi-color,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,13324062,Westman Atelier Eye Pods,4.2,15,80.00,no,Les Jours - Out of stock,#000000,,,,,,
1,https://www.cultbeauty.co.uk/westman-atelier-e...,Westman Atelier,https://static.thcdn.com/design-assets/images/...,13324061,"Formulated with only the finest, skin-loving i...",A luminous eyeshadow trio in two iterations fo...,Gucci created each Eye Pod palette with one cl...,Brand:\nWestman Atelier,multi-color,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,13324063,Westman Atelier Eye Pods,4.2,15,80.00,no,Les Nuits,#000000,,,,,,
2,https://www.cultbeauty.co.uk/huda-beauty-lovef...,Huda Beauty,https://static.thcdn.com/design-assets/images/...,13899183,,We’re feeling all kinds of groovy and ready to...,Build your base with matte shadows. Pick up th...,Brand:\nHuda Beauty,single,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,13899183,Huda Beauty Lovefest Obsessions Eyeshadow Palette,4.92,13,27.00,no,,,"Deep Brown Matte (1) – Mica, Magnesium Myrista...",https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,,,
3,https://www.cultbeauty.co.uk/hindash-manifesto...,Hindash,https://static.thcdn.com/design-assets/images/...,13798789,"Taking your beauty out of this world, Hindash’...","Sealing your look with kissable softness, the ...",Pat the lipstick all over lips for a soft tint...,Brand:\nHindash,multi-shade,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,13798791,Hindash Manifesto Lipstick 3.5g (Various Shades),4.76,21,26.00,yes,,,"Octyldodecanol, Polysilicone11, Synthetic Wax,...",https://static.thcdn.com/images/large/original...,,Call me Peaches,#dd8670,
4,https://www.cultbeauty.co.uk/hindash-manifesto...,Hindash,https://static.thcdn.com/design-assets/images/...,13798789,"Taking your beauty out of this world, Hindash’...","Sealing your look with kissable softness, the ...",Pat the lipstick all over lips for a soft tint...,Brand:\nHindash,multi-shade,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,13798790,Hindash Manifesto Lipstick 3.5g (Various Shades),4.76,21,20.80,yes,,,"Octyldodecanol, Polysilicone11, Synthetic Wax,...",https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,Rest in Ross,#ab0033,
5,https://www.cultbeauty.co.uk/welleco-nourishin...,WelleCo,https://static.thcdn.com/design-assets/images/...,13314044,Co-founded by supermodel Elle Macpherson (aka ...,"Nourishing and slimming, WelleCo’s Nourishing ...",The Nourishing Protein is a great supplement t...,Brand:\nWelleCo,multi-size,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,13314044,WelleCo Nourishing Protein Chocolate Refill,4.8,30,29.00,yes,,,"Pea Protein (26%), Brown Rice Protein (24%), C...",https://static.thcdn.com/images/large/webp//pr...,,,,300g Refill
6,https://www.cultbeauty.co.uk/thank-you-farmer-...,Thank You Farmer,https://static.thcdn.com/design-assets/images/...,13313863,"Fanatical about sun protection, South Korean w...","A beautiful moisturiser-meets-sunscreen, Thank...","After basic skincare, take a proper amount of ...",Brand:\nThank You Farmer,single,https://static.thcdn.com/images/large/webp//pr...,,,,13313863,Thank You Farmer Sun Project Water Sun Cream S...,4.59,696,20.00,yes,,,"Water, Ethylhexyl Methoxycinnamate, Homosalate...",,,,,
7,https://www.cultbeauty.co.uk/color-wow-travel-...,Color WOW,https://static.thcdn.com/design-assets/images/...,11870457,"A problem-solving range of hair care heroes, C...","An innovative must-have for protecting porous,...","Shampoo, condition, towel dry hair, divide in ...",Volume:\n50ml\nSize:\nTravel Size\nBrand:\nCol...,multi-size,https://static.thcdn.com/images/large/webp//pr...,https://static.thcdn.com/images/large/webp//pr...,,,11870457,Color Wow Travel Dream Coat Supernatural Spray...,4.35,208,12.50,yes,,,"Aqua/Water/Eau, Dipropylene Glycol, Polysilico...",,,,,50ml
8,https://www.cultbeauty.co.uk/color-wow-travel-...,Color WOW,https://static.thcdn.com/design-assets/images/...,11870457,"A problem-solving range of hair care heroes, C...","An innovative must-have for protecting porous,...","Shampoo, condition, towel dry hair, divide in ...",Volume:\n50ml\nSize:\nTravel Size\nBrand:\nCol...,multi-size,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,https://static.thcdn.com/images/large/original...,11516014,Color Wow Dream Coat Supernatural Spray 200ml,4.43,354,27.00,yes,,,"Aqua/Water/Eau, Dipropylene Glycol, Polysilico...",,,,,200ml
9,https://www.cultbeauty.com/huda-beauty-kayali-...,Huda Beauty,https://static.thcdn.com/design-assets/images/...,14272370,"Brainchild of Huda Beauty founders, Huda and M...",Joining KAYALI's ranks of intoxicatingly addic...,Spritz a couple of times on your hair or pulse...,Brand:\nHuda Beauty,multi-size,https://static.thcdn.com/images/large/webp//pr...,,,,14272370,Huda Beauty KAYALI Yum Pistachio Gelato 33 Eau...,4.19,31,28.25€,yes,,,"Sd Alcohol 40 - B/Alcohol Denat., Fragrance/Pa...",,,,,10ml
