In [14]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value, timeout=10):
    """
    Wait for an element to be clickable and click it.
    """
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.element_to_be_clickable((by, value))
        )
        element.click()
    except Exception as e:
        print(f"Element not found or could not be clicked: {str(e)}")

def get_select_elements(driver, by, value, timeout=10):
    """
    Wait for and return all elements matching the selector.
    """
    try:
        elements = WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((by, value))
        )
        return elements
    except Exception as e:
        print(f"Elements not found: {str(e)}")
        return []

def get_designs(designs_list, available_designs):
    """
    Filter and return the selected designs from the available designs list.
    """
    selected_designs = []
    for design in available_designs:
        if design.text in designs_list:
            selected_designs.append(design)
    return selected_designs

def collect_table_info(driver, div_class):
    """
    Collect both table headers and data based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

# Configuration
website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
my_designs = ['350']
folder = 'Alliance'

if not os.path.exists(folder):
    os.mkdir(folder)

try:
    # Initialize WebDriver
    driver = setup_driver(driver_path)
    driver.get(website)

    # Accept cookies
    wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

    # Select brand
    select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
    brands_button = select_buttons[2]
    brands_button.click()
    brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    brands[0].click()

    # Wait for the loader to disappear
    WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

    # Select design
    design_button = select_buttons[3]
    design_button.click()
    available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    selected_designs = get_designs(my_designs, available_designs)
    if selected_designs:
        selected_designs[0].click()

    # Click search button
    wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

    # Collect data for both mm and inch tables
    international_table_info = collect_table_info(driver, "data_in_mm")
    usa_table_info = collect_table_info(driver, "data_in_inch")

finally:
    time.sleep(5)
    driver.quit()
    print("All data has been collected and the browser is closed.")


All data has been collected and the browser is closed.


In [11]:
international_table_info

{'Table_1': {'headers': {0: {0: 'Size',
    1: 'Rim',
    2: 'Unloaded Dimension',
    3: 'Loaded Static radius\nmm',
    4: 'Rolling Circumference\nmm',
    5: 'Load Index\nPR\nSymbols',
    6: 'Inflation Pressure\nbar',
    7: 'Recommended Load'},
   1: {0: 'Speed'},
   2: {0: 'Not high and sustained torque;Road transport'},
   4: {0: 'SW\nmm',
    1: 'OD\nmm',
    2: '10\nkmph',
    3: '20\nkmph',
    4: '25\nkmph',
    5: '30\nkmph',
    6: '40\nkmph',
    7: '50\nkmph',
    8: '65\nkmph'}},
  'data': [{0: '300/95R52 (12.4R52)',
    1: 'W10X52 W9X52',
    2: '310',
    3: '1890',
    4: '887',
    5: '5781',
    6: '156D\n159A8',
    7: '0.8',
    8: '2100',
    9: '1720',
    10: '1660',
    11: '1610',
    12: '1530',
    13: '1470',
    14: '1400'},
   {0: '1.2',
    1: '2700',
    2: '2210',
    3: '2130',
    4: '2070',
    5: '1970',
    6: '1890',
    7: '1800'},
   {0: '1.6',
    1: '3270',
    2: '2680',
    3: '2580',
    4: '2510',
    5: '2390',
    6: '2290',
    7: '2

In [70]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import os

def setup_driver(driver_path):
    """
    Set up the Chrome WebDriver.
    """
    service = Service(executable_path=driver_path)
    return webdriver.Chrome(service=service)

def wait_and_click(driver, by, value, timeout=10):
    """
    Wait for an element to be clickable and click it.
    """
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.element_to_be_clickable((by, value))
        )
        element.click()
    except Exception as e:
        print(f"Element not found or could not be clicked: {str(e)}")

def get_select_elements(driver, by, value, timeout=10):
    """
    Wait for and return all elements matching the selector.
    """
    try:
        elements = WebDriverWait(driver, timeout).until(
            EC.presence_of_all_elements_located((by, value))
        )
        return elements
    except Exception as e:
        print(f"Elements not found: {str(e)}")
        return []

def get_designs(designs_list, available_designs):
    """
    Filter and return the selected designs from the available designs list.
    """
    selected_designs = []
    for design in available_designs:
        if design.text in designs_list:
            selected_designs.append(design)
    return selected_designs


def collect_table_info(driver, div_class):
    """
    Collect both table headers and data based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_info = {}
        for i, table in enumerate(tables):
            header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
            headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
            headers_text = {
                index: {i: th.text for i, th in enumerate(th_list)}
                for index, th_list in headers_dict.items()
            }
            
            table_data_rows = table.find_elements(By.TAG_NAME, 'tbody')[0].find_elements(By.TAG_NAME, 'tr')
            data_rows = [{i: td.text for i, td in enumerate(row.find_elements(By.TAG_NAME, 'td'))} for row in table_data_rows]
            
            table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                    "data": data_rows
                }
        
        return table_info
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

def collect_filtered_data(driver, div_class):
    """
    Collect filtered data from tables based on the specified div class.
    """
    try:
        data_div = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.XPATH, f'//div[@class="{div_class} "]'))
        )
        tables = data_div.find_elements(By.XPATH, './/table[@id="tblResultView"]')
        if div_class == 'data_in_inch':
            driver.execute_script("arguments[0].style.display = 'block';", data_div)
        
        table_data = []
        
        xpath_expression = f'//tbody/tr[./td[@style="color:#a30e13 !important"]]'
        table_unit = {'data_in_mm': 'tables_mm', 'data_in_inch': 'tables_inch'}

        if table_unit[div_class] == 'tables_mm':

            table_info = {}

            print('\nfitered rows in mm:\n')
            for i, table in enumerate(tables):

                header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
                headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
                headers_text = {
                    index: {i: th.text for i, th in enumerate(th_list)}
                    for index, th_list in headers_dict.items()
                }

                table_data_in_mm = table.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')

                filtered_data = {}
                for index, tr in enumerate(table_data_in_mm):
                    filter_list = tr.find_elements(By.XPATH, xpath_expression)
                    for idx, ftr in enumerate(filter_list):
                        filtered_data[idx] = ftr.text

                data = filtered_data[i]
                table_data.append({i: data})

                table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                }
                
                print(f'Table_{i+1} Data Collected')

        else:
            table_info = {}

            print('\nfitered rows in inch:\n')

            for i, table in enumerate(tables):

                header_rows = table.find_element(By.TAG_NAME, 'thead').find_elements(By.TAG_NAME, 'tr')
                headers_dict = {index: row.find_elements(By.TAG_NAME, 'th') for index, row in enumerate(header_rows) if index != 3}
                headers_text = {
                    index: {i: th.text for i, th in enumerate(th_list)}
                    for index, th_list in headers_dict.items()
                }

                table_data_in_inch = table.find_element(By.TAG_NAME, 'tbody').find_elements(By.TAG_NAME, 'tr')

                filtered_data = {}
                for index, tr in enumerate(table_data_in_inch, start=2):
                    filter_list = tr.find_elements(By.XPATH, xpath_expression)
                    for idx, ftr in enumerate(filter_list):
                        filtered_data[idx] = ftr.text

                data = filtered_data[i+2]
                table_data.append({i: data})

                table_info[f"Table_{i+1}"] = {
                    "headers": headers_text,
                }
                
                print(f'Table_{i+1} Data Collected')

        processed_data = [
            {
                i: {j: txt for j, txt in enumerate(td.split(' '))}
                for i, td in table.items()
            }
            for table in table_data
        ]

        for idx, dta in enumerate(processed_data):
            table_info[f'Data'] = dta
        
        return table_info
    
    
    except Exception as e:
        print(f"An error occurred while fetching tables: {str(e)}")
        return []

# Configuration
website = "https://yokohama-atg.com/usa/tire-selector-yokohama-off-highway-tires/"
driver_path = "C:\\Users\\cheta\\Downloads\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"  # Change according to your PC's file path
my_designs = ['350']
folder = 'Alliance'

if not os.path.exists(folder):
    os.mkdir(folder)

try:
    # Initialize WebDriver
    driver = setup_driver(driver_path)
    driver.get(website)

    # Accept cookies
    wait_and_click(driver, By.XPATH, '//span[@data-cookie-set="accept"]')

    # Select brand
    select_buttons = get_select_elements(driver, By.XPATH, '//span[@class="select2-arrow"]')
    brands_button = select_buttons[2]
    brands_button.click()
    brands = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    brands[0].click()

    # Wait for the loader to disappear
    WebDriverWait(driver, 10).until(EC.invisibility_of_element((By.XPATH, '//div[@class="loader-wrapper"]')))

    # Select design
    design_button = select_buttons[3]
    design_button.click()
    available_designs = get_select_elements(driver, By.XPATH, '//li[@class="select2-results-dept-0 select2-result select2-result-selectable"]')
    selected_designs = get_designs(my_designs, available_designs)
    if selected_designs:
        selected_designs[0].click()

    # Click search button
    wait_and_click(driver, By.XPATH, '//a[@title="Search"]')

    # Collect filtered data for both mm and inch tables
    international_table_data = collect_filtered_data(driver, "data_in_mm")
    usa_table_data = collect_filtered_data(driver, "data_in_inch")

finally:
    time.sleep(5)
    driver.quit()
    print("All data has been collected and the browser is closed.")



fitered rows in mm:

Table_1 Data Collected
Table_2 Data Collected

fitered rows in inch:

Table_1 Data Collected
Table_2 Data Collected
All data has been collected and the browser is closed.


In [71]:
international_table_data

{'Table_1': {'headers': {0: {0: 'Size',
    1: 'Rim',
    2: 'Unloaded Dimension',
    3: 'Loaded Static radius\nmm',
    4: 'Rolling Circumference\nmm',
    5: 'Load Index\nPR\nSymbols',
    6: 'Inflation Pressure\nbar',
    7: 'Recommended Load'},
   1: {0: 'Speed'},
   2: {0: 'Not high and sustained torque;Road transport'},
   4: {0: 'SW\nmm',
    1: 'OD\nmm',
    2: '10\nkmph',
    3: '20\nkmph',
    4: '25\nkmph',
    5: '30\nkmph',
    6: '40\nkmph',
    7: '50\nkmph',
    8: '65\nkmph'}}},
 'Table_2': {'headers': {0: {0: 'Size',
    1: 'Rim',
    2: 'Unloaded Dimension',
    3: 'Loaded Static radius\nmm',
    4: 'Rolling Circumference\nmm',
    5: 'Load Index\nPR\nSymbols',
    6: 'Inflation Pressure\nbar',
    7: 'Recommended Load'},
   1: {0: 'Speed'},
   2: {0: 'CYCLIC OPERATION'},
   4: {0: 'SW\nmm', 1: 'OD\nmm', 2: '10\nkmph', 3: '15\nkmph'}}},
 'Data': {1: {0: '6', 1: '6800', 2: '6200'}}}

In [72]:
usa_table_data

{'Table_1': {'headers': {0: {0: 'Size',
    1: 'Rim',
    2: 'Unloaded Dimension',
    3: 'Loaded Static radius\ninch',
    4: 'Rolling Circumference\ninch',
    5: 'Load Index\nPR\nSymbols',
    6: 'Inflation Pressure\npsi',
    7: 'Recommended Load'},
   1: {0: 'Speed'},
   2: {0: 'Not high and sustained torque;Road transport'},
   4: {0: 'SW\ninch',
    1: 'OD\ninch',
    2: '6\nmph',
    3: '12\nmph',
    4: '16\nmph',
    5: '19\nmph',
    6: '25\nmph',
    7: '31\nmph',
    8: '40\nmph'}}},
 'Table_2': {'headers': {0: {0: 'Size',
    1: 'Rim',
    2: 'Unloaded Dimension',
    3: 'Loaded Static radius\ninch',
    4: 'Rolling Circumference\ninch',
    5: 'Load Index\nPR\nSymbols',
    6: 'Inflation Pressure\npsi',
    7: 'Recommended Load'},
   1: {0: 'Speed'},
   2: {0: 'CYCLIC OPERATION'},
   4: {0: 'SW\ninch', 1: 'OD\ninch', 2: '6\nmph', 3: '9\nmph'}}},
 'Data': {1: {0: '87', 1: '14990', 2: '13670'}}}

In [46]:
# processed_data = [
#     {
#         i: {j: txt for j, txt in enumerate(td.split(' '))}
#         for i, td in table.items()
#     }
#     for table in international_table_data
# ]

In [47]:
processed_data

[{0: {0: '4.8',
   1: '6000',
   2: '4920',
   3: '4740',
   4: '4600',
   5: '4380',
   6: '4200',
   7: '4000'}},
 {1: {0: '6', 1: '6800', 2: '6200'}}]

In [40]:
usa_table_data

[{0: '70 13230 10850 10450 10140 9660 9260 8820'}, {1: '87 14990 13670'}]

In [33]:
for i in [0, 1]:
    dict = {i: v for i, v in enumerate(table_data[i].split(' '))}
    print(dict)

{0: '4.8', 1: '6000', 2: '4920', 3: '4740', 4: '4600', 5: '4380', 6: '4200', 7: '4000'}
{0: '6', 1: '6800', 2: '6200'}


In [2]:
international_table_data

[{'Table_1': '4.8 6000 4920 4740 4600 4380 4200 4000'},
 {'Table_2': '6 6800 6200'}]

In [3]:
usa_table_data

[{'Table_1': '70 13230 10850 10450 10140 9660 9260 8820'},
 {'Table_2': '87 14990 13670'}]