## Required Libraries

In [1]:
# Load libraries
import os, sys
import logging
import threading
import itertools
from iteration_utilities import random_product
from tqdm.notebook import tqdm
import concurrent.futures
import time

import numpy as np
import pandas as pd
import polars as pl

import dask.bag as db
import dask.dataframe as dd
from dask import delayed

from selenium import webdriver
from selenium.webdriver.edge.options import Options
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Show all package versions
%load_ext watermark
%watermark -v -p numpy,pandas,polars,dask,selenium

Python implementation: CPython
Python version       : 3.11.11
IPython version      : 8.30.0

numpy   : 1.26.4
pandas  : 2.2.3
polars  : 1.24.0
dask    : 2025.2.0
selenium: 4.24.0



## Configuration and Credentials

In [3]:
# Configure logging
logging.basicConfig(level=logging.INFO)

# Retrieve credentials from environment variables or use defaults
GLOBUS_USER = os.getenv("GLOBUS_USER", "won103203@naver.com")
GLOBUS_PW = os.getenv("GLOBUS_PW", "goTeamC@1")

# Path to Microsoft Edge WebDriver
edge_driver_path = "C:/Users/yuhan/Downloads/edgedriver_win64/msedgedriver.exe"

# Edge options
edge_options = Options()
# Uncomment the following line to run in headless mode (recommended for automation)
# edge_options.add_argument('--headless')
edge_options.add_argument('--no-sandbox')
edge_options.add_argument('--disable-dev-shm-usage')

## Selenium Helper Functions

In [4]:
def create_edge_driver(driver_num):
    """Creates and returns a new Edge WebDriver instance with the specified options."""
    time.sleep(0.5 * (driver_num - 1))  # Stagger driver creation
    service = Service(edge_driver_path)
    driver = webdriver.Edge(service=service, options=edge_options)
    driver.implicitly_wait(5)  # Set an implicit wait
    logging.info(f"Edge WebDriver {driver_num} created.")
    return driver

def login_to_globus(driver, driver_num, user=GLOBUS_USER, password=GLOBUS_PW):
    """
    Logs in to the Glo-Bus website using the provided driver and credentials.
    Adjust the element selectors to match the actual site's login form.
    """
    try:
        driver.get("https://www.glo-bus.com/")
        
        # Wait for the login button or form to load (update the selector as needed)
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, "loginbutton"))
        )
        
        # Selectors for the username and password fields
        username_field = driver.find_element(By.ID, "acct_name")
        password_field = driver.find_element(By.ID, "passwdInput")
        username_field.clear()
        username_field.send_keys(user)
        password_field.clear()
        password_field.send_keys(password)
        
        # Click the login submit button
        driver.find_element(By.ID, 'loginbutton').click()
        
        # Wait for a post-page-change element to confirm success
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CLASS_NAME, "text-nowrap"))
        )
        logging.info(f"\tDriver {driver_num}: Login successful.")
    except Exception as e:
        logging.error(f"\tDriver {driver_num}: Error during login: %s", e)
        raise

def go_to_decision_page(driver, driver_num, subsection):
    """
    Navigates to the decision page after logging in.
    Adjust the element selectors to match the actual site's structure.
    """
    try:
        driver.get(f'https://www.glo-bus.com/users/program21/decisions/{subsection}')

        # Wait for a post-login element to confirm success
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.CLASS_NAME, "asidenav"))
        )
        logging.info(f"\tDriver {driver_num}: Decision page loaded. Section: {subsection}.")
    except Exception as e:
        logging.error(f"\tDriver {driver_num}: Error navigating to decision page: %s", e)
        raise

In [5]:
def placeholder_task(driver, driver_num, params_id, iterator_chunk=None):
    """
    A placeholder Selenium task.
    This function navigates to a placeholder URL and then closes the driver.
    Replace the contents with your actual task logic.
    """
    try:
        logging.info(f"\t\tPlaceholder task executed on driver {driver_num}.")
        pq_rating = retrieve_PD_performance_stats(driver)
        # Add additional Selenium operations here.
    except Exception as e:
        logging.error("\t\tError in placeholder task: %s", e)
    finally:
        time.sleep(5)  # Pause for 5 seconds before closing the driver
        driver.quit()
        logging.info(f"Edge WebDriver {driver_num} closed.")


    # Create empty dataframe (or build your result dataframe)    
    df = pd.DataFrame({'iterator_chunk': [iterator_chunk], 'pq_rating': pq_rating})
    return df

def run_selenium_task(config):
    """
    Creates a new driver, logs in, navigates to a decision page,
    runs a placeholder task, and returns a dataframe.
    """
    driver_num = config["driver_num"]
    subsection = config["subsection"]
    iterator_chunk = config["iterator_chunk"]
    
    # Create the driver outside the lock so that each thread can do this concurrently.
    driver = create_edge_driver(driver_num)
    
    # Ensure that the login and navigation steps are done sequentially.
    # with login_lock:
    login_to_globus(driver, driver_num)
    go_to_decision_page(driver, driver_num, subsection)
    
    try:
        # Run the placeholder task and return its dataframe.
        df = placeholder_task(driver, driver_num, params_id=None, iterator_chunk=iterator_chunk)
        return df
    except Exception as e:
            logging.error("Error in task for driver %s: %s", driver_num, e)
            return pd.DataFrame()  # Return an empty dataframe in case of error
    

def run_multiple_selenium_tasks(task_configs):
    """
    Opens multiple drivers in separate threads and executes tasks concurrently.
    
    Returns a combined dataframe from all tasks.
    """
    results = []
    
    # Use ThreadPoolExecutor to collect return values easily.
    with concurrent.futures.ThreadPoolExecutor(max_workers=len(task_configs)) as executor:
        # Submit tasks and store futures
        future_to_config = {executor.submit(run_selenium_task, config): config for config in task_configs}
        for future in concurrent.futures.as_completed(future_to_config):
            config = future_to_config[future]
            try:
                df = future.result()
                results.append(df)
            except Exception as e:
                logging.error("Task for config %s generated an exception: %s", config, e)
    
    combined_df = pd.concat(results, ignore_index=True)
    return combined_df

In [10]:
# Actual usage of the function
# task_configs = [
#     {"driver_num": 1, "subsection": "product-design", "iterator_chunk": 0},
#     {"driver_num": 2, "subsection": "acc-marketing", "iterator_chunk": 1},
#     {"driver_num": 3, "subsection": "uav-marketing", "iterator_chunk": 2},
# ]

task_configs = [
    {"driver_num": 1, "subsection": "product-design", "iterator_chunk": 0},
    {"driver_num": 2, "subsection": "product-design", "iterator_chunk": 1},
    {"driver_num": 3, "subsection": "product-design", "iterator_chunk": 2},
]

final_df = run_multiple_selenium_tasks(task_configs)
print(final_df)

INFO:root:Edge WebDriver 1 created.
INFO:root:Edge WebDriver 2 created.
INFO:root:	Driver 1: Login successful.
INFO:root:Edge WebDriver 3 created.
INFO:root:	Driver 1: Decision page loaded. Section: product-design.
INFO:root:		Placeholder task executed on driver 1.
INFO:root:	Driver 2: Login successful.
INFO:root:	Driver 3: Login successful.
INFO:root:	Driver 2: Decision page loaded. Section: product-design.
INFO:root:		Placeholder task executed on driver 2.
INFO:root:	Driver 3: Decision page loaded. Section: product-design.
INFO:root:		Placeholder task executed on driver 3.
ERROR:root:		Error in placeholder task: Message: no such element: Unable to locate element: {"method":"css selector","selector":".ac-calc-area1.perf-quality > .d-flex.align-items-center > strong"}
  (Session info: MicrosoftEdge=134.0.3124.93); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifie

Empty DataFrame
Columns: []
Index: []


In [6]:
# Create the driver outside the lock so that each thread can do this concurrently.
driver_num = 1
driver = create_edge_driver(driver_num)

# Ensure that the login and navigation steps are done sequentially.
# with login_lock:
login_to_globus(driver, driver_num)
go_to_decision_page(driver, driver_num, "product-design")

INFO:root:Edge WebDriver 1 created.
INFO:root:	Driver 1: Login successful.
INFO:root:	Driver 1: Decision page loaded. Section: product-design.


In [44]:
# total_cost_component 

acc_production_costs = driver.find_element(By.CSS_SELECTOR, "glo-dec-product-design .card.mb-2.ac-calc-area2")
acc_total_costs_comp_n_feat = acc_production_costs.find_element(By.CSS_SELECTOR, "tr:nth-child(9) td:nth-child(2) strong").text
acc_total_production_cost = acc_production_costs.find_element(By.CSS_SELECTOR, "tr:nth-child(15) td:nth-child(2) strong").text
acc_projected_unit_assembly = driver.find_element(By.CSS_SELECTOR, ".ac-calc-area3 td:nth-child(4) span").text
print(f"Total production costs: {acc_total_production_cost}")
print(f"Total costs (comp + feat): {acc_total_costs_comp_n_feat}")
print(f"Projected unit assembly: {acc_projected_unit_assembly}")

uav_production_costs = driver.find_element(By.CSS_SELECTOR, "glo-dec-product-design .card.mb-2.uav-calc-area2")
uav_total_costs_comp_n_feat = uav_production_costs.find_element(By.CSS_SELECTOR, "tr:nth-child(9) td:nth-child(2) strong").text
uav_total_production_cost = uav_production_costs.find_element(By.CSS_SELECTOR, "tr:nth-child(15) td:nth-child(2) strong").text
uav_projected_unit_assembly = driver.find_element(By.CSS_SELECTOR, ".uav-calc-area3 td:nth-child(4) span").text
print(f"Total production costs: {uav_total_production_cost}")
print(f"Total costs (comp + feat): {uav_total_costs_comp_n_feat}")
print(f"Projected unit assembly: {uav_projected_unit_assembly}")

# body > glo-app > section > div.main-panel.h-100.d-inline-block > div > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area1.perf-quality > div > table > tbody > tr > td.align-middle > div > span:nth-child(1) > strong
# body > glo-app > section > div.main-panel.h-100.d-inline-block > div > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area3 > div > table > tbody > tr > td:nth-child(4) > div > span
# body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area2 > div > table > tbody > tr:nth-child(15) > td:nth-child(2) > div > span:nth-child(2) > strong
# test = driver.find_element(By.CSS_SELECTOR, "body > glo-app > section > div.main-panel.h-100.d-inline-block > div > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area2 > div > table > tbody > tr:nth-child(9) > td:nth-child(2) > div > span:nth-child(2) > strong").text
# test
# body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area2 > div > table > tbody > tr:nth-child(9) > td:nth-child(2) > div > span:nth-child(2) > strong
# body > glo-app > section > div.main-panel.h-100.d-inline-block > div > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area2 > div > table > tbody > tr:nth-child(9) > td:nth-child(2) > div > span:nth-child(2) > strong

Total production costs: 203,041
Total costs (comp + feat): 103,083
Projected unit assembly: 779.8
Total production costs: 241,167
Total costs (comp + feat): 167,775
Projected unit assembly: 155.5


In [45]:
driver.quit()

## Product Design Option Generator

### a. Base dictionary for all product design options

In [17]:
# Define Product Design Options for Action-Capture Camera and UAV Drone
action_capture_camera_design_dict = {
    ('Image Sensor Size', 'G4-btn'): ['8mm', '9mm', '10mm', '11mm', '12mm', '13mm', '14mm'],
    ('LCD Display Screen', 'G5-btn'): ['230k', '460k', '610k', '920k', '1040k', '1230k', '2360k'],
    ('Image Quality', 'G6-btn'): ['1920×1080', '1920×1440', '2704×1520', '2704×2028', '3840×2160', '3840×2400', '4096×2160'],
    ('Number of Photo Modes', 'G7-btn'): ['4 / 3', '6 / 3', '7 / 3', '8 / 3', '10 / 4', '12 / 4', '16 / 4'],
    ('Camera Housing', 'G9-btn'): ['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16'],
    ('Editing / Sharing Capabilities', 'G10-btn'): ['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16'],
    ('Included Accessories', 'G11-btn'): ['6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20'],
    ('Extra Performance Features', 'G13-btn'): ['2', '3', '4', '5', '6', '7', '8', '9', '10'],
    ('Number of Action Capture Camera Models', 'G14-btn'): ['1', '2', '3', '4', '5', '6', '7'],
    ('Product R&D Expenditures', 'G16-btn'): ['0', '3000', '6000', '9000', '12000', '16000', '20000', '24000', '28000', '32000', '36000', '40000', '45000', '50000']
}

uav_drone_design_dict = {
    ('Built-In Camera', 'G22-btn'): ['No Upgrade', 'Minor Upgrade', 'Significant Upgrade', 'Major Upgrade'],
    ('GPS / WiFi / Bluetooth', 'G23-btn'): ['Basic', 'Enhanced', 'Advanced', 'Best Available'],
    ('Battery Pack', 'G24-btn'): ['8', '10', '12', '15', '18', '21', '25', '30'],
    ('Number of Rotors', 'G25-btn'): ['4', '6', '8'],
    ('Rotor Performance / Flight Controller', 'G26-btn'): ['Basic', 'Enhanced', 'Advanced', 'Best Available'],
    ('Body Frame Construction', 'G27-btn'): ['High-Strength Plastic', 'g10/FR4 Fiberglass', 'Carbon Fiber'],
    ('Obstacle Sensors', 'G28-btn'): ['Front Only', 'Front/Rear', '360° Basic', '360° Enhanced', '360° Advanced', '360° Best Available'],
    ('Camera Stabilization Device', 'G29-btn'): ['Basic', 'Enhanced', 'Advanced', 'Best Available'],
    ('Extra Performance Features', 'G31-btn'): ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'],
    ('Number of UAV Drone Models', 'G32-btn'): ['1', '2', '3', '4', '5', '6', '7'],
    ('Product R&D Expenditures', 'G34-btn'): ['0', '3000', '6000', '9000', '12000', '15000', '18000', '21000', '24000', '27000', '30000', '35000', '40000', '45000', '50000']
}

### b. Helper functions to generate combinations of product design options

In [None]:
def calculate_number_of_possible_options(params_dict):
    """
    Returns the number of possible options for a given parameter dictionary.
    """
    # Calculate the number of options
    return np.prod([len(v) for v in params_dict.values()])

def fix_design_choices(design_dict, fix_values_dict):
    """
    design_dict: 디자인 선택지 딕셔너리
    fix_values_dict: 고정할 선택지 딕셔너리
    design_dict에서 fix_values_dict의 key에 해당하는 선택지를 values로 고정
    새로운 선택지 딕셔너리를 반환
    """
    new_design_dict = design_dict.copy()
    for key, value in fix_values_dict.items():
        new_design_dict[key] = [value]
    return new_design_dict

# Calculate the number of design options for each product
print(f"Number of Action-Capture Camera Design Options: {calculate_number_of_possible_options(action_capture_camera_design_dict)}")
print(f"Number of UAV Drone Design Options: {calculate_number_of_possible_options(uav_drone_design_dict)}")

Number of Action-Capture Camera Design Options: 5368323870
Number of UAV Drone Design Options: 162570240


### Action Capture Camera
- 하위 3개 조건 다 제거 후 돌리기
- Camera Housing: > 13
- Editing/Sharing Capabilities > 13
- Included Accessories > 13

- 단 모델 수는 4개 이하로 제한



### UAV Drone minimum settings

- GPS / Wifi / Basic: Basic unincluded
- Rotor Performance/ Flight Controller: Basic unincluded
- Battery Pack: 15
- Number of Rotors: 8
- R&D Expenditures: > 24000

In [19]:
strategy_A_camera_year_6_dict ={
    ('Image Sensor Size', 'G4-btn'): ['11mm', '12mm', '13mm', '14mm'],
    ('LCD Display Screen', 'G5-btn'): ['920k', '1040k', '1230k', '2360k'],
    ('Image Quality', 'G6-btn'): ['2704×2028', '3840×2160', '3840×2400', '4096×2160'],
    ('Number of Photo Modes', 'G7-btn'): ['8 / 3', '10 / 4', '12 / 4', '16 / 4'],
    ('Camera Housing', 'G9-btn'): ['13', '14', '15', '16'],
    ('Editing / Sharing Capabilities', 'G10-btn'): ['13', '14', '15', '16'],
    ('Included Accessories', 'G11-btn'): ['13', '14', '15', '16', '17', '18', '19', '20'],
    ('Extra Performance Features', 'G13-btn'): ['5', '6', '7', '8', '9', '10'],
    ('Number of Action Capture Camera Models', 'G14-btn'): ['1', '2', '3', '4'],
    ('Product R&D Expenditures', 'G16-btn'): ['20000', '24000', '28000', '32000', '36000', '40000', '45000', '50000']
}

strategy_A_drone_year_6_dict = {
    ('Built-In Camera', 'G22-btn'): ['No Upgrade', 'Minor Upgrade', 'Significant Upgrade', 'Major Upgrade'],
    ('GPS / WiFi / Bluetooth', 'G23-btn'): ['Enhanced', 'Advanced', 'Best Available'],
    ('Battery Pack', 'G24-btn'): ['15', '18', '21', '25', '30'],
    ('Number of Rotors', 'G25-btn'): ['8'],
    ('Rotor Performance / Flight Controller', 'G26-btn'): ['Enhanced', 'Advanced', 'Best Available'],
    ('Body Frame Construction', 'G27-btn'): ['High-Strength Plastic', 'g10/FR4 Fiberglass', 'Carbon Fiber'],
    ('Obstacle Sensors', 'G28-btn'): ['Front Only', 'Front/Rear', '360° Basic', '360° Enhanced', '360° Advanced', '360° Best Available'],
    ('Camera Stabilization Device', 'G29-btn'): ['Enhanced', 'Advanced', 'Best Available'],
    ('Extra Performance Features', 'G31-btn'): ['5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'],
    ('Number of UAV Drone Models', 'G32-btn'): ['1', '2', '3', '4', '5', '6', '7'],
    ('Product R&D Expenditures', 'G34-btn'): ['35000', '40000', '45000', '50000']
}

# Calculate number of design options after implementing strategy A
print(f"Number of Action-Capture Camera Design Options (Strategy A): {calculate_number_of_design_options(strategy_A_camera_year_6_dict)}")
print(f"Number of UAV Drone Design Options (Strategy A): {calculate_number_of_design_options(strategy_A_drone_year_6_dict)}")

Number of Action-Capture Camera Design Options (Strategy A): 6291456
Number of UAV Drone Design Options (Strategy A): 2993760


### c. Helper functions for generating product design options
Output: iterator_chunks for lazy evaluation

In [None]:
# Define a function to split a list into chunks of a specified size
def split_list_into_chunks(lst, chunk_size):
    chunks = []
    # Iterate over the list with a step of chunk_size
    for i in range(0, len(lst), chunk_size):
        # Slice the list from index 'i' to 'i + chunk_size' and append it to chunks
        chunks.append(lst[i:i + chunk_size])
    return chunks

# Define a generator to yield chunks from an iterator
def chunk_iterator(iterator, chunk_size):
    """Yield lists of items from the iterator, each with a maximum of chunk_size elements."""
    if type(iterator) == list:
        iterator = (x for x in iterator)
    while True:
        chunk = itertools.islice(iterator, chunk_size)
        if not chunk:
            break
        yield chunk

def generate_combination_iterators(params_dict, num_combinations=None, num_chunks=5):
    """
    Generates iterators for creating combinations of parameter options based on the provided parameter dictionary.
    
    Parameters:
        params_dict (dict): Dictionary of possible parameter options.
        num_comb_thousands (int): Number of combinations in thousands (default is 1000).
        num_chunks (int): Number of chunks to divide the combinations into (default is 5).
    
    Returns:
        iterator_chunks: <generator object chunk_iterator> (lazy evaluation)
    """
    # Get the number of parameters and combinations
    num_params = len(params_dict)
    print(num_params)

    # Calculate total number of combinations
    if num_combinations == None:
        # Generate all possible parameter combinations for the given parameter dictionary
        combinations_iterator = itertools.product(*params_dict.values())
        num_combinations = calculate_number_of_possible_options(params_dict)
        print(f"Using all possible combinations: {num_combinations}")
    else:
        # Randomly sample the combinations
        random_combination_list = random_product(*params_dict.values(), repeat=num_combinations)
        combinations_iterator = iter(split_list_into_chunks(random_combination_list, num_params))
        print(f"Selecting random number of combinations: {num_combinations}")

    # Create iterators for generating combinations in chunks
    chunk_size = num_combinations // num_chunks + (1 if num_combinations % num_chunks else 0)
    
    # Divide the combinations into chunks
    iterator_chunks = chunk_iterator(combinations_iterator, chunk_size)
    print(f"Creating {num_chunks} chunks of size {chunk_size} each.")
    
    return iterator_chunks

In [47]:
test = generate_combination_iterators(action_capture_camera_design_dict, num_combinations=10, num_chunks=2)

first_chunk = [*next(test)]
print(first_chunk)

second_chunk = list(next(test))
print(second_chunk)

third_chunk = list(next(test))
print(third_chunk)

10
Selecting random number of combinations: 10
Creating 2 chunks of size 5 each.
[('14mm', '230k', '3840×2400', '12 / 4', '9', '5', '10', '10', '2', '45000'), ('8mm', '1230k', '3840×2400', '12 / 4', '4', '16', '17', '9', '7', '6000'), ('12mm', '230k', '1920×1080', '8 / 3', '12', '7', '17', '4', '4', '50000'), ('9mm', '1040k', '4096×2160', '8 / 3', '11', '6', '16', '8', '1', '12000'), ('11mm', '920k', '2704×1520', '8 / 3', '6', '15', '17', '3', '5', '24000')]
[('13mm', '230k', '2704×2028', '4 / 3', '14', '13', '10', '4', '3', '45000'), ('13mm', '230k', '1920×1440', '12 / 4', '4', '5', '12', '8', '4', '16000'), ('11mm', '610k', '3840×2160', '8 / 3', '10', '6', '14', '7', '2', '16000'), ('14mm', '2360k', '3840×2400', '16 / 4', '13', '12', '6', '7', '4', '50000'), ('9mm', '920k', '1920×1080', '7 / 3', '9', '9', '8', '4', '3', '32000')]
[]


### d. Helper functions for uploading product design options to the website and capturing the results

In [None]:
def retrieve_PD_performance_stats(driver):
    """
    Retrieves performance statistics from the Product Design page.
    """
    pq_rating = driver.find_element(By.CSS_SELECTOR, ".ac-calc-area1.perf-quality td.align-middle strong").text
    
    

    return pq_rating

In [None]:
def scrape_PD_results(driver, product):
    if product == 'action_camera':
        pq_rating_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area1.perf-quality > div > table > tbody > tr > td.align-middle > div > span:nth-child(1) > strong'
        cf_cost_per_unit_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area2 > div > table > tbody > tr:nth-child(9) > td:nth-child(3) > div > span:nth-child(2) > strong'
        pa_cost_per_unit_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(1) > div.card.mb-2.ac-calc-area2 > div > table > tbody > tr:nth-child(15) > td:nth-child(3) > div > span:nth-child(2) > strong'

    elif product == 'drone':
        pq_rating_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(2) > div.card.mb-2.uav-calc-area1.perf-quality > div > table > tbody > tr > td.align-middle > div > span:nth-child(1) > strong'
        cf_cost_per_unit_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(2) > div.card.mb-2.uav-calc-area2 > div > table > tbody > tr:nth-child(9) > td:nth-child(3) > div > span:nth-child(2) > strong'
        pa_cost_per_unit_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > div.m-3 > glo-dec-product-design > div > div:nth-child(2) > div.card.mb-2.uav-calc-area2 > div > table > tbody > tr:nth-child(15) > td:nth-child(3) > div > span:nth-child(2) > strong'
    
    net_profit_css = 'body > glo-app > section > div > div:nth-child(2) > glo-decisions > div > glo-projected-performance-top > div > div > div.col-12.d-flex.justify-content-evenly.align-items-start > table:nth-child(3) > tbody > tr:nth-child(2) > td:nth-child(2)'

    try:
        # Wait for the page to load
        WebDriverWait(driver, 1).until(EC.presence_of_element_located((By.CSS_SELECTOR, net_profit_css)))

        pq_rating = driver.find_element(By.CSS_SELECTOR, pq_rating_css).text
        cf_cost_per_unit = driver.find_element(By.CSS_SELECTOR, cf_cost_per_unit_css).text
        pa_cost_per_unit = driver.find_element(By.CSS_SELECTOR, pa_cost_per_unit_css).text
        net_profit = driver.find_element(By.CSS_SELECTOR, net_profit_css).text

    except Exception as e:
        logging.error(f"Error retrieving Performance Statistics for{product}: {e}")
        pq_rating = None
        cf_cost_per_unit = None
        pa_cost_per_unit = None
        net_profit = None

    return pq_rating, cf_cost_per_unit, pa_cost_per_unit, net_profit

# Use selenium to change parameters in the product design page
# Each set of parameters is a row in the DataFrame
# For each row, we will:
# 1. Set the parameters
# 2. Read the PQ Rating and PA Cost per Unit
# 3. Save the results to a new row in the dataframe

def set_design_parameters(iterator_chunk, driver, param_id):
    """
    Sets the design parameters for each row in the iterator_chunk and scrapes the results.
    """
    # Convert the iterator chunk to a list
    chunk = [*iterator_chunk]

    # List to store results for each row
    results = []

    # Iterate over each row in the chunk
    for _, row in tqdm(chunk.iterrows(), total=len(chunk), desc='Simulating Design Parameters'):
        try:
            # Set the design parameters
            for key, value in row.items():
                # Update the design parameters
                element = driver.find_element(By.ID, key)
                select = Select(element)
                select.select_by_visible_text(value)

            # Scrape the results
            pq_rating, cf_cost_per_unit, pa_cost_per_unit, net_profit = scrape_PD_results(driver, product)
            
            # Append the results as a dictionary
            results.append({
                'Performance / Quality Rating': pq_rating,
                'Component/Feature Cost per Unit': cf_cost_per_unit,
                'Production Cost per Unit': pa_cost_per_unit,
                'Net Profit': net_profit
            })
        except Exception as e:
            logging.error("Error during scraping: %s", e)
            # Append a row with NaN values in case of an error
            results.append({
                'Performance / Quality Rating': None,
                'Component/Feature Cost per Unit': None,
                'Production Cost per Unit': None,
                'Net Profit': None
            })

    # Create a DataFrame from the results and concatenate it with the original chunk
    results_df = pd.DataFrame(results, index=chunk.index)
    return pd.concat([chunk, results_df], axis=1)

## Random Options Sampling

### for UAV Drone

In [None]:
strat_A_rand_drone_options_df = create_n_random_combinations(strategy_A_drone_year_6_dict, num_combinations=2000)
strat_A_rand_drone_options_df.head()

In [None]:
# Example usage for small set of options
strat_A_rand_camera_options_df_results = scrape_pq_and_cost_mini_parallel_threads(strat_A_rand_drone_options_df, 'drone', chunk_size=200)

In [None]:
strat_A_rand_camera_options_df_results

In [None]:
# Calculate statistics of interest
def calculate_result_ratios(results_df):
    """
    Calculates the Performance / Quality Rating Efficiency and Fixed Cost Ratio based on the provided results DataFrame.
    """
    # Change the data type of the columns to float
    # Deal with commas in the data
    result_columns = ['Performance / Quality Rating', 'Component/Feature Cost per Unit', 'Production Cost per Unit', 'Net Profit']
    results_df[result_columns] = results_df[result_columns].replace(',', '', regex=True)
    results_df[result_columns] = results_df[result_columns].astype(float)

    # Calculate the Performance / Quality Rating Efficiency and Fixed Cost Ratio
    results_df['Rating_Efficiency'] = results_df['Performance / Quality Rating'] / results_df['Production Cost per Unit']
    results_df['Fixed Cost Ratio'] = results_df['Component/Feature Cost per Unit'] / results_df['Production Cost per Unit']

    return results_df

In [None]:
strat_A_rand_camera_options_df_results = calculate_result_ratios(strat_A_rand_camera_options_df_results)

strat_A_rand_camera_options_df_results.sort_values(by='Rating_Efficiency', ascending=False).head(20)

## Generate All Options

### Small Option Space - Less than 10 million

In [None]:
# Create a list of all possible design combinations for the action-capture camera
def generate_dataframe_from_design_dict(design_dict, mode):
    """
    Generates a DataFrame with all possible design combinations based on the given design dictionary.
    """
    if mode == 'polars':
        # Define the schema for the Polar DataFrame
        schema = {key[1]: str for key in design_dict.keys()}

        # Create a list of all possible design combinations (use generator for memory efficiency)
        design_combinations = itertools.product(*design_dict.values())

        # Create a polars DataFrame with the design combinations
        df = pl.DataFrame(design_combinations, schema=schema)

        return df
    elif mode == 'pandas':
        # Define columns for the pandas DataFrame
        columns = [key[1] for key in design_dict.keys()]

        # Create a list of all possible design combinations (use generator for memory efficiency)
        design_combinations = itertools.product(*design_dict.values())

        # Create a pandas DataFrame with the design combinations
        df = pd.DataFrame(design_combinations, columns=columns)

        return df

In [None]:
strat_A_camera_options_df = generate_dataframe_from_design_dict(strategy_A_camera_year_6_dict, mode='pandas')
strat_A_drone_options_df = generate_dataframe_from_design_dict(strategy_A_drone_year_6_dict, mode='pandas')

In [None]:
strat_A_camera_options_df

### Large Option Space - Greater than 10 million

In [None]:
def generate_parquet_from_design_dict(design_dict, product, output_file, npartitions=1000):
    """
    Generates a Parquet file containing all combinations of design options from the given design dictionary.

    Parameters:
    - design_dict (dict): Dictionary where keys are tuples (column_name, element_id) and values are lists of options.
    - product (str): Name of the product (used to locate the directory).
    - output_file (str): Path to the output Parquet file.
    - npartitions (int): Number of partitions for the Dask Bag.
    """
    # Define save directory
    save_dir = f"product_design_query/{product}/" + output_file

    # Extract column names (using the first element of each key) and lists of values
    cols = [key[0] for key in design_dict.keys()]
    values_lists = list(design_dict.values())

    # Create a lazy iterator for the Cartesian product of all design options
    all_combinations = itertools.product(*values_lists)
    print(all_combinations)

    # Build a Dask Bag from the iterator
    bag = db.from_sequence(all_combinations, npartitions=npartitions)
    print("Dask bag created")

    # Convert the Bag to a Dask DataFrame with the proper column names
    ddf = bag.to_dataframe(columns=cols)
    # print first 5 rows
    print(ddf.head())

    # Save the lazy Dask DataFrame as a Parquet file
    ddf.to_parquet(save_dir, write_index=False)

# Example usage
generate_parquet_from_design_dict(uav_drone_design_dict, 'drone', "all_combinations.parquet", npartitions=163)

In [None]:
generate_parquet_from_design_dict(action_capture_camera_design_dict, 'camera', "all_combinations.parquet", npartitions=5369)

In [None]:
driver = create_edge_driver()
login_to_globus(driver)
go_to_decision_page(driver, 'product-design')
driver.quit()

In [None]:
def generate_chunks(value_lists, columns, chunk_size):
    """
    Generator function that yields DataFrame chunks of the Cartesian product.
    """
    # Create an iterator for all possible combinations
    iterator = itertools.product(*value_lists)
    while True:
        # Grab the next chunk_size combinations
        chunk = list(itertools.islice(iterator, chunk_size))
        if not chunk:
            break
        yield pd.DataFrame(chunk, columns=columns)

def generate_design_combinations(product, design_dir_name, design_dict, chunk_size=1000000):
    """
    Returns a list of dictionaries, where each dictionary represents a unique design combination.
    """
    # Extract column names (first element of each key tuple) and value lists
    columns = [key[1] for key in design_dict.keys()]
    value_lists = list(design_dict.values())

    # Wrap each chunk with dask.delayed so Dask can build a lazy graph
    delayed_dfs = [delayed(chunk_df) for chunk_df in tqdm(generate_chunks(value_lists, columns, chunk_size))]

    # Create a Dask DataFrame from the delayed chunks
    ddf = dd.from_delayed(delayed_dfs)

    # Write the Dask DataFrame to a Parquet file.
    # This will write the data in partitions (chunks) to the '{product}/{design_dir_name}.parquet' directory.
    ddf.to_parquet(f'product_design_query/{product}/{design_dir_name}.parquet', write_index=False)

    print(f'Finished generating design combinations for {product} using {design_dir_name}.')

In [None]:
generate_design_combinations('drone', 'all_combinations', uav_drone_design_dict)

## Analyze Different Product Design Options

In [None]:
# read in first 5 rows of the generated parquet file
ddf = dd.read_parquet('product_design_query/drone/all_combinations.parquet')
print(ddf.head())

In [None]:


# Create a function to scrape the performance quality rating and production cost per unit for each design combination
def scrape_performance_quality_and_cost(product, design_dir_name):
    """
    Scrapes the performance quality rating and production cost per unit for each design combination.
    """
    # Load the Dask DataFrame from the Parquet file
    ddf = dd.read_parquet(f'product_design_query/{product}/{design_dir_name}.parquet')

    # List to store updated delayed partitions
    updated_partitions = []

    # Iterate over each chunk of the Dask DataFrame
    for i, delayed_obj in tqdm(enumerate(ddf.to_delayed()), desc='Processing Chunks', total=ddf.npartitions):
        try:
            # Create a new Edge WebDriver instance for each chunk
            driver = create_edge_driver()

            # Login to the Glo-Bus website
            login_to_globus(driver)

            # Navigate to the product design page
            go_to_decision_page(driver, 'product-design')

            # Compute the partition to get a pandas DataFrame
            df_partition = delayed_obj.compute()

            # Replace chunk with updated version
            updated_df = set_design_parameters(df_partition, driver)

            # Wrap the updated DataFrame back into a delayed object
            updated_delayed = delayed(updated_df)
            updated_partitions.append(updated_delayed)
            
        except Exception as e:
            logging.error(f"Error processing chunk {i}: {e}")
        finally:
            # Quit the WebDriver instance
            driver.quit()
            logging.info("WebDriver instance closed.")

    # Reconstruct a Dask DataFrame from the list of delayed partitions
    updated_ddf = dd.from_delayed(updated_partitions)

    # Write the updated Dask DataFrame to a new Parquet file
    updated_ddf.to_parquet(f'product_design_query/{product}/{design_dir_name}_results.parquet', write_index=False)
    logging.info(f"Finished scraping performance quality and cost for {product} using {design_dir_name}.")

### Simulate Action Capture Camera

In [None]:
def scrape_pq_and_cost_mini(df, product):
    try: 
        # Create a new Edge WebDriver instance for each chunk
        driver = create_edge_driver()

        # Login to the Glo-Bus website
        login_to_globus(driver)

        # Navigate to the product design page
        go_to_decision_page(driver, 'product-design')

        # Iterate over each row in the Polars DataFrame
        updated_df = set_design_parameters(df, driver, product)
    except Exception as e:
        logging.error(f"Error while scraping {product}: {e}")
    finally:
        # Quit the WebDriver instance
        driver.quit()
        logging.info("WebDriver instance closed.")

    return updated_df

## Multithread Processing

In [None]:
def process_chunk_thread(chunk_df, product, driver):
    """
    Process a chunk of the DataFrame using a thread.
    Each thread creates its own WebDriver instance.
    """
    try:
        go_to_decision_page(driver, 'product-design')
        updated_chunk = set_design_parameters(chunk_df, driver, product)
    except Exception as e:
        logging.error(f"Error processing chunk for {product}: {e}")
        updated_chunk = chunk_df
    return updated_chunk

In [None]:
import concurrent.futures
from contextlib import contextmanager

@contextmanager
def scrape_pq_and_cost_mini_parallel_threads(df, product, chunk_size=1000):
    """
    Splits the Polars DataFrame into chunks and processes each chunk in parallel using threads.
    This avoids the pickling issues common with multiprocessing in Jupyter notebooks.
    """
    num_rows = df.shape[0]
    # Split DataFrame into chunks
    chunks = [df[i:i+chunk_size] for i in range(0, num_rows, chunk_size)]
    
    try:
        # Create a new Edge WebDriver instance for each thread
        driver = create_edge_driver()

        # Login to the Glo-Bus website
        login_to_globus(driver)

        updated_chunks = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(process_chunk_thread, chunk, product, driver) for chunk in chunks]
            for future in concurrent.futures.as_completed(futures):
                updated_chunks.append(future.result())
    except Exception as e:
        logging.error(f"Error while scraping {product} via Multi-thread: {e}")
    finally:
        # Quit the WebDriver instance
        driver.quit()
        logging.info("WebDriver instance closed.")
    
    updated_df = pd.concat(updated_chunks)
    return updated_df

In [None]:
# Example usage for small set of options
strat_A_drone_options_df_results = scrape_pq_and_cost_mini(strat_A_drone_options_df, 'drone')

### Simulate UAV Drone

In [None]:
import itertools
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import gc

# Set a chunk size that suits your system's memory. Adjust as necessary.
def generate_chunks(value_lists, columns, chunk_size):
    """
    Generator function that yields DataFrame chunks of the Cartesian product.
    """
    # Create an iterator for all possible combinations
    iterator = itertools.product(*value_lists)
    while True:
        # Grab the next chunk_size combinations
        chunk = list(itertools.islice(iterator, chunk_size))
        if not chunk:
            break
        yield pd.DataFrame(chunk, columns=columns)

def generate_design_combinations_v2(product, design_dict, chunk_size=10000000):

    # Extract column names (from the first element of each key) and lists of options
    columns = [key[0] for key in design_dict.keys()]
    value_lists = list(design_dict.values())

    # Process each chunk one at a time
    for i, df_chunk in tqdm(enumerate(generate_chunks(value_lists, columns, chunk_size))):
        # Convert the chunk to a PyArrow Table
        table = pa.Table.from_pandas(df_chunk)
        # Write the table to a Parquet file (each chunk in its own file)
        pq.write_table(table, f"product_design_query/{product}/all_combinations/chunk_{i}.parquet")
        
        # Clear the DataFrame from memory and trigger garbage collection
        del df_chunk
        gc.collect()
        
        # print(f"Chunk {i} written and memory cleared.")

In [None]:
generate_design_combinations_v2('camera', action_capture_camera_design_dict)

In [None]:
driver = create_edge_driver()
login_to_globus(driver)
go_to_decision_page(driver, 'product-design')
driver.quit()

In [None]:
# Read in the Parquet file as a Dask DataFrame
ddf = dd.read_parquet('product_design_query/drone/all_combinations.parquet')

# Choose only first 100 rows for testing
ddf_test = ddf.head(100)

In [None]:
def click_resume_button():
    # Resume 버튼 클릭
    resume_button_xpath = '/html/body/glo-app/section/glo-keep-alive/div/div/div/div[3]/button'
    if driver.find_element(By.XPATH, resume_button_xpath).is_displayed():
        driver.find_element(By.XPATH, resume_button_xpath).click()
    else:
        pass

In [None]:
click_resume_button()

In [None]:
driver.quit()